#!/usr/bin/env python3 """ Bootstrap compiler: reference SX evaluator -> Python. Reads the .sx reference specification and emits a standalone Python evaluator module (sx_ref.py) that can be compared against the hand-written evaluator.py / html.py / async_eval.py. The compiler translates the restricted SX subset used in eval.sx/render.sx into idiomatic Python. Platform interface functions are emitted as native Python implementations. Usage: python bootstrap_py.py > sx_ref.py """ from __future__ import annotations import os import sys # Add project root to path for imports _HERE = os.path.dirname(os.path.abspath(__file__)) _PROJECT = os.path.abspath(os.path.join(_HERE, "..", "..", "..")) sys.path.insert(0, _PROJECT) from shared.sx.parser import parse_all from shared.sx.types import Symbol, Keyword, NIL as SX_NIL # --------------------------------------------------------------------------- # SX -> Python transpiler # --------------------------------------------------------------------------- # Python reserved words — SX names that collide get _ suffix # Excludes names we intentionally shadow (list, dict, range, filter, map) _PY_RESERVED = frozenset({ "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class", "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield", # builtins we don't want to shadow "default", "type", "id", "input", "open", "print", "set", "super", }) class PyEmitter: """Transpile an SX AST node to Python source code.""" def __init__(self): self.indent = 0 self._async_names: set[str] = set() # SX names of define-async functions self._in_async: bool = False # Currently emitting async def body? def emit(self, expr) -> str: """Emit a Python expression from an SX AST node.""" # Bool MUST be checked before int (bool is subclass of int in Python) if isinstance(expr, bool): return "True" if expr else "False" if isinstance(expr, (int, float)): return str(expr) if isinstance(expr, str): return self._py_string(expr) if expr is None or expr is SX_NIL: return "NIL" if isinstance(expr, Symbol): return self._emit_symbol(expr.name) if isinstance(expr, Keyword): return self._py_string(expr.name) if isinstance(expr, dict): return self._emit_native_dict(expr) if isinstance(expr, list): return self._emit_list(expr) return str(expr) def emit_statement(self, expr, indent: int = 0) -> str: """Emit a Python statement from an SX AST node.""" pad = " " * indent if isinstance(expr, list) and expr: head = expr[0] if isinstance(head, Symbol): name = head.name if name == "define": return self._emit_define(expr, indent) if name == "define-async": return self._emit_define_async(expr, indent) if name == "set!": return f"{pad}{self._mangle(expr[1].name)} = {self.emit(expr[2])}" if name == "when": return self._emit_when_stmt(expr, indent) if name == "do" or name == "begin": return "\n".join(self.emit_statement(e, indent) for e in expr[1:]) if name == "for-each": return self._emit_for_each_stmt(expr, indent) if name == "dict-set!": return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}" if name == "append!": return f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})" if name == "env-set!": return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}" if name == "set-lambda-name!": return f"{pad}{self.emit(expr[1])}.name = {self.emit(expr[2])}" return f"{pad}{self.emit(expr)}" # --- Symbol emission --- def _emit_symbol(self, name: str) -> str: mangled = self._mangle(name) cell_vars = getattr(self, '_current_cell_vars', set()) if mangled in cell_vars: return f"_cells[{self._py_string(mangled)}]" return mangled def _mangle(self, name: str) -> str: """Convert SX identifier to valid Python identifier.""" RENAMES = { "nil": "NIL", "true": "True", "false": "False", "nil?": "is_nil", "type-of": "type_of", "symbol-name": "symbol_name", "keyword-name": "keyword_name", "make-lambda": "make_lambda", "make-component": "make_component", "make-macro": "make_macro", "make-thunk": "make_thunk", "make-handler-def": "make_handler_def", "make-query-def": "make_query_def", "make-action-def": "make_action_def", "make-page-def": "make_page_def", "make-symbol": "make_symbol", "make-keyword": "make_keyword", "lambda-params": "lambda_params", "lambda-body": "lambda_body", "lambda-closure": "lambda_closure", "lambda-name": "lambda_name", "set-lambda-name!": "set_lambda_name", "component-params": "component_params", "component-body": "component_body", "component-closure": "component_closure", "component-has-children?": "component_has_children", "component-name": "component_name", "component-affinity": "component_affinity", "component-param-types": "component_param_types", "component-set-param-types!": "component_set_param_types", "macro-params": "macro_params", "macro-rest-param": "macro_rest_param", "macro-body": "macro_body", "macro-closure": "macro_closure", "thunk?": "is_thunk", "thunk-expr": "thunk_expr", "thunk-env": "thunk_env", "callable?": "is_callable", "lambda?": "is_lambda", "component?": "is_component", "island?": "is_island", "make-island": "make_island", "make-signal": "make_signal", "signal?": "is_signal", "signal-value": "signal_value", "signal-set-value!": "signal_set_value", "signal-subscribers": "signal_subscribers", "signal-add-sub!": "signal_add_sub", "signal-remove-sub!": "signal_remove_sub", "signal-deps": "signal_deps", "signal-set-deps!": "signal_set_deps", "set-tracking-context!": "set_tracking_context", "get-tracking-context": "get_tracking_context", "make-tracking-context": "make_tracking_context", "tracking-context-deps": "tracking_context_deps", "tracking-context-add-dep!": "tracking_context_add_dep", "tracking-context-notify-fn": "tracking_context_notify_fn", "identical?": "is_identical", "notify-subscribers": "notify_subscribers", "flush-subscribers": "flush_subscribers", "dispose-computed": "dispose_computed", "with-island-scope": "with_island_scope", "register-in-scope": "register_in_scope", "*batch-depth*": "_batch_depth", "*batch-queue*": "_batch_queue", "*island-scope*": "_island_scope", "*store-registry*": "_store_registry", "def-store": "def_store", "use-store": "use_store", "clear-stores": "clear_stores", "emit-event": "emit_event", "on-event": "on_event", "bridge-event": "bridge_event", "dom-listen": "dom_listen", "dom-dispatch": "dom_dispatch", "event-detail": "event_detail", "macro?": "is_macro", "primitive?": "is_primitive", "get-primitive": "get_primitive", "env-has?": "env_has", "env-get": "env_get", "env-set!": "env_set", "env-extend": "env_extend", "env-merge": "env_merge", "dict-set!": "dict_set", "dict-get": "dict_get", "dict-has?": "dict_has", "dict-delete!": "dict_delete", "eval-expr": "eval_expr", "eval-list": "eval_list", "eval-call": "eval_call", "is-render-expr?": "is_render_expr", "render-expr": "render_expr", "call-lambda": "call_lambda", "call-component": "call_component", "parse-keyword-args": "parse_keyword_args", "parse-comp-params": "parse_comp_params", "parse-macro-params": "parse_macro_params", "expand-macro": "expand_macro", "render-to-html": "render_to_html", "render-to-sx": "render_to_sx", "render-value-to-html": "render_value_to_html", "render-list-to-html": "render_list_to_html", "render-html-element": "render_html_element", "render-html-component": "render_html_component", "parse-element-args": "parse_element_args", "render-attrs": "render_attrs", "aser-list": "aser_list", "aser-fragment": "aser_fragment", "aser-call": "aser_call", "aser-special": "aser_special", "sf-if": "sf_if", "sf-when": "sf_when", "sf-cond": "sf_cond", "sf-cond-scheme": "sf_cond_scheme", "sf-cond-clojure": "sf_cond_clojure", "sf-case": "sf_case", "sf-case-loop": "sf_case_loop", "sf-and": "sf_and", "sf-or": "sf_or", "sf-let": "sf_let", "sf-lambda": "sf_lambda", "sf-define": "sf_define", "sf-defcomp": "sf_defcomp", "defcomp-kwarg": "defcomp_kwarg", "sf-defmacro": "sf_defmacro", "sf-begin": "sf_begin", "sf-quote": "sf_quote", "sf-quasiquote": "sf_quasiquote", "sf-thread-first": "sf_thread_first", "sf-set!": "sf_set_bang", "sf-reset": "sf_reset", "sf-shift": "sf_shift", "qq-expand": "qq_expand", "ho-map": "ho_map", "ho-map-indexed": "ho_map_indexed", "ho-filter": "ho_filter", "ho-reduce": "ho_reduce", "ho-some": "ho_some", "ho-every": "ho_every", "ho-for-each": "ho_for_each", "sf-defstyle": "sf_defstyle", "special-form?": "is_special_form", "ho-form?": "is_ho_form", "strip-prefix": "strip_prefix", "escape-html": "escape_html", "escape-attr": "escape_attr", "escape-string": "escape_string", "raw-html-content": "raw_html_content", "HTML_TAGS": "HTML_TAGS", "VOID_ELEMENTS": "VOID_ELEMENTS", "BOOLEAN_ATTRS": "BOOLEAN_ATTRS", # render.sx core "definition-form?": "is_definition_form", # adapter-html.sx "RENDER_HTML_FORMS": "RENDER_HTML_FORMS", "render-html-form?": "is_render_html_form", "dispatch-html-form": "dispatch_html_form", "render-lambda-html": "render_lambda_html", "make-raw-html": "make_raw_html", "render-html-island": "render_html_island", "serialize-island-state": "serialize_island_state", "json-serialize": "json_serialize", "empty-dict?": "is_empty_dict", "sf-defisland": "sf_defisland", # adapter-sx.sx "render-to-sx": "render_to_sx", # adapter-async.sx platform primitives "svg-context-set!": "svg_context_set", "svg-context-reset!": "svg_context_reset", "css-class-collect!": "css_class_collect", "is-raw-html?": "is_raw_html", "async-coroutine?": "is_async_coroutine", "async-await!": "async_await", "is-sx-expr?": "is_sx_expr", "sx-expr?": "is_sx_expr", "io-primitive?": "io_primitive_p", "expand-components?": "expand_components_p", "svg-context?": "svg_context_p", "make-sx-expr": "make_sx_expr", "aser": "aser", "eval-case-aser": "eval_case_aser", "sx-serialize": "sx_serialize", "sx-serialize-dict": "sx_serialize_dict", "sx-expr-source": "sx_expr_source", # Primitives that need exact aliases "contains?": "contains_p", "starts-with?": "starts_with_p", "ends-with?": "ends_with_p", "empty?": "empty_p", "every?": "every_p", "for-each": "for_each", "for-each-indexed": "for_each_indexed", "map-indexed": "map_indexed", "map-dict": "map_dict", "eval-cond": "eval_cond", "eval-cond-scheme": "eval_cond_scheme", "eval-cond-clojure": "eval_cond_clojure", "process-bindings": "process_bindings", # deps.sx "scan-refs": "scan_refs", "scan-refs-walk": "scan_refs_walk", "transitive-deps": "transitive_deps", "compute-all-deps": "compute_all_deps", "scan-components-from-source": "scan_components_from_source", "components-needed": "components_needed", "page-component-bundle": "page_component_bundle", "page-css-classes": "page_css_classes", "component-deps": "component_deps", "component-set-deps!": "component_set_deps", "component-css-classes": "component_css_classes", "component-io-refs": "component_io_refs", "component-set-io-refs!": "component_set_io_refs", "env-components": "env_components", "regex-find-all": "regex_find_all", "scan-css-classes": "scan_css_classes", # deps.sx IO detection "scan-io-refs": "scan_io_refs", "scan-io-refs-walk": "scan_io_refs_walk", "transitive-io-refs": "transitive_io_refs", "compute-all-io-refs": "compute_all_io_refs", "component-io-refs-cached": "component_io_refs_cached", "component-pure?": "component_pure_p", "render-target": "render_target", "page-render-plan": "page_render_plan", # router.sx "split-path-segments": "split_path_segments", "make-route-segment": "make_route_segment", "parse-route-pattern": "parse_route_pattern", "match-route-segments": "match_route_segments", "match-route": "match_route", "find-matching-route": "find_matching_route", } if name in RENAMES: return RENAMES[name] # General mangling result = name # Handle trailing ? and ! if result.endswith("?"): result = result[:-1] + "_p" elif result.endswith("!"): result = result[:-1] + "_b" # Kebab to snake_case result = result.replace("-", "_") # Escape Python reserved words if result in _PY_RESERVED: result = result + "_" return result # --- List emission --- def _emit_list(self, expr: list) -> str: if not expr: return "[]" head = expr[0] if not isinstance(head, Symbol): # Data list return "[" + ", ".join(self.emit(x) for x in expr) + "]" name = head.name handler = getattr( self, f"_sf_{name.replace('-', '_').replace('!', '_b').replace('?', '_p')}", None, ) if handler: return handler(expr) # Built-in forms if name in ("fn", "lambda"): return self._emit_fn(expr) if name in ("let", "let*"): return self._emit_let(expr) if name == "if": return self._emit_if(expr) if name == "when": return self._emit_when(expr) if name == "cond": return self._emit_cond(expr) if name == "case": return self._emit_case(expr) if name == "and": return self._emit_and(expr) if name == "or": return self._emit_or(expr) if name == "not": return f"(not sx_truthy({self.emit(expr[1])}))" if name in ("do", "begin"): return self._emit_do(expr) if name == "list": return "[" + ", ".join(self.emit(x) for x in expr[1:]) + "]" if name == "dict": return self._emit_dict_literal(expr) if name == "quote": return self._emit_quote(expr[1]) if name == "set!": # set! in expression context — use nonlocal_cells dict for mutation # from nested lambdas (Python closures can read but not rebind outer vars) varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) py_var = self._mangle(varname) return f"_sx_cell_set(_cells, {self._py_string(py_var)}, {self.emit(expr[2])})" if name == "str": parts = [self.emit(x) for x in expr[1:]] return "sx_str(" + ", ".join(parts) + ")" # Mutation forms that can appear in expression context if name == "append!": return f"_sx_append({self.emit(expr[1])}, {self.emit(expr[2])})" if name == "dict-set!": return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})" if name == "env-set!": return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})" if name == "set-lambda-name!": return f"_sx_set_attr({self.emit(expr[1])}, 'name', {self.emit(expr[2])})" # Infix operators if name in ("+", "-", "*", "/", "=", "!=", "<", ">", "<=", ">=", "mod"): return self._emit_infix(name, expr[1:]) if name == "inc": return f"({self.emit(expr[1])} + 1)" if name == "dec": return f"({self.emit(expr[1])} - 1)" # Regular function call fn_name = self._mangle(name) args = ", ".join(self.emit(x) for x in expr[1:]) if self._in_async and name in self._async_names: return f"(await {fn_name}({args}))" return f"{fn_name}({args})" # --- Special form emitters --- def _emit_fn(self, expr) -> str: params = expr[1] body = expr[2:] param_names = [] rest_name = None i = 0 while i < len(params): p = params[i] if isinstance(p, Symbol) and p.name == "&rest": # Next param is the rest parameter if i + 1 < len(params): rest_name = self._mangle(params[i + 1].name if isinstance(params[i + 1], Symbol) else str(params[i + 1])) i += 2 continue else: i += 1 continue if isinstance(p, Symbol): param_names.append(self._mangle(p.name)) else: param_names.append(str(p)) i += 1 if rest_name: param_names.append(f"*{rest_name}") params_str = ", ".join(param_names) if len(body) == 1: body_py = self.emit(body[0]) return f"lambda {params_str}: {body_py}" # Multi-expression body: need a local function lines = [] lines.append(f"_sx_fn(lambda {params_str}: (") for b in body[:-1]: lines.append(f" {self.emit(b)},") lines.append(f" {self.emit(body[-1])}") lines.append(")[-1])") return "\n".join(lines) def _emit_let(self, expr) -> str: bindings = expr[1] body = expr[2:] assignments = [] if isinstance(bindings, list): if bindings and isinstance(bindings[0], list): # Scheme-style: ((name val) ...) for b in bindings: vname = b[0].name if isinstance(b[0], Symbol) else str(b[0]) assignments.append((self._mangle(vname), self.emit(b[1]))) else: # Clojure-style: (name val name val ...) for i in range(0, len(bindings), 2): vname = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i]) assignments.append((self._mangle(vname), self.emit(bindings[i + 1]))) # Nested IIFE for sequential let (each binding can see previous ones): # (lambda a: (lambda b: body)(val_b))(val_a) # Cell variables (mutated by nested set!) are initialized in _cells dict # instead of lambda params, since the body reads _cells[name]. cell_vars = getattr(self, '_current_cell_vars', set()) body_parts = [self.emit(b) for b in body] if len(body) == 1: body_str = body_parts[0] else: body_str = f"_sx_begin({', '.join(body_parts)})" # Build from inside out result = body_str for name, val in reversed(assignments): if name in cell_vars: # Cell var: initialize in _cells dict, not as lambda param result = f"_sx_begin(_sx_cell_set(_cells, {self._py_string(name)}, {val}), {result})" else: result = f"(lambda {name}: {result})({val})" return result def _emit_if(self, expr) -> str: cond = self.emit(expr[1]) then = self.emit(expr[2]) els = self.emit(expr[3]) if len(expr) > 3 else "NIL" return f"({then} if sx_truthy({cond}) else {els})" def _emit_when(self, expr) -> str: cond = self.emit(expr[1]) body_parts = expr[2:] if len(body_parts) == 1: return f"({self.emit(body_parts[0])} if sx_truthy({cond}) else NIL)" body = ", ".join(self.emit(b) for b in body_parts) return f"(_sx_begin({body}) if sx_truthy({cond}) else NIL)" def _emit_when_stmt(self, expr, indent: int = 0) -> str: pad = " " * indent cond = self.emit(expr[1]) body_parts = expr[2:] lines = [f"{pad}if sx_truthy({cond}):"] for b in body_parts: self._emit_stmt_recursive(b, lines, indent + 1) return "\n".join(lines) def _emit_cond(self, expr) -> str: clauses = expr[1:] if not clauses: return "NIL" # Check ALL clauses are 2-element lists (scheme-style). # Checking only the first is ambiguous — (nil? x) is a 2-element # function call, not a scheme clause ((test body)). is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) if is_scheme: return self._cond_scheme(clauses) return self._cond_clojure(clauses) def _cond_scheme(self, clauses) -> str: if not clauses: return "NIL" clause = clauses[0] test = clause[0] body = clause[1] if isinstance(test, Symbol) and test.name in ("else", ":else"): return self.emit(body) if isinstance(test, Keyword) and test.name == "else": return self.emit(body) return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_scheme(clauses[1:])})" def _cond_clojure(self, clauses) -> str: if len(clauses) < 2: return "NIL" test = clauses[0] body = clauses[1] if isinstance(test, Keyword) and test.name == "else": return self.emit(body) if isinstance(test, Symbol) and test.name in ("else", ":else"): return self.emit(body) return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_clojure(clauses[2:])})" def _emit_case(self, expr) -> str: match_expr = self.emit(expr[1]) clauses = expr[2:] return f"_sx_case({match_expr}, [{self._case_pairs(clauses)}])" def _case_pairs(self, clauses) -> str: pairs = [] i = 0 while i < len(clauses) - 1: test = clauses[i] body = clauses[i + 1] if isinstance(test, Keyword) and test.name == "else": pairs.append(f"(None, lambda: {self.emit(body)})") elif isinstance(test, Symbol) and test.name in ("else", ":else"): pairs.append(f"(None, lambda: {self.emit(body)})") else: pairs.append(f"({self.emit(test)}, lambda: {self.emit(body)})") i += 2 return ", ".join(pairs) def _emit_and(self, expr) -> str: parts = [self.emit(x) for x in expr[1:]] if len(parts) == 1: return parts[0] # Use Python's native and for short-circuit evaluation. # Last value returned as-is; prior values tested with sx_truthy. # (and a b c) -> (a if not sx_truthy(a) else (b if not sx_truthy(b) else c)) result = parts[-1] for p in reversed(parts[:-1]): result = f"({p} if not sx_truthy({p}) else {result})" return result def _emit_or(self, expr) -> str: if len(expr) == 2: return self.emit(expr[1]) parts = [self.emit(x) for x in expr[1:]] # Use Python's short-circuit pattern: # (or a b c) -> (a if sx_truthy(a) else (b if sx_truthy(b) else c)) result = parts[-1] for p in reversed(parts[:-1]): result = f"({p} if sx_truthy({p}) else {result})" return result def _emit_do(self, expr) -> str: return self._emit_do_inner(expr[1:]) def _emit_do_inner(self, exprs) -> str: if len(exprs) == 1: return self.emit(exprs[0]) parts = [self.emit(e) for e in exprs] return "_sx_begin(" + ", ".join(parts) + ")" def _emit_native_dict(self, expr: dict) -> str: """Emit a native Python dict (from parser's {:key val} syntax).""" parts = [] for key, val in expr.items(): parts.append(f"{self._py_string(key)}: {self.emit(val)}") return "{" + ", ".join(parts) + "}" def _emit_dict_literal(self, expr) -> str: pairs = expr[1:] parts = [] i = 0 while i < len(pairs) - 1: key = pairs[i] val = pairs[i + 1] if isinstance(key, Keyword): parts.append(f"{self._py_string(key.name)}: {self.emit(val)}") else: parts.append(f"{self.emit(key)}: {self.emit(val)}") i += 2 return "{" + ", ".join(parts) + "}" def _emit_infix(self, op: str, args: list) -> str: PY_OPS = {"=": "==", "!=": "!=", "mod": "%"} py_op = PY_OPS.get(op, op) if len(args) == 1 and op == "-": return f"(-{self.emit(args[0])})" return f"({self.emit(args[0])} {py_op} {self.emit(args[1])})" def _emit_define(self, expr, indent: int = 0) -> str: pad = " " * indent name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) val_expr = expr[2] # Always emit fn-bodied defines as def statements for flat control flow if (isinstance(val_expr, list) and val_expr and isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")): return self._emit_define_as_def(name, val_expr, indent) val = self.emit(val_expr) return f"{pad}{self._mangle(name)} = {val}" def _emit_define_async(self, expr, indent: int = 0) -> str: """Emit a define-async form as an async def statement.""" name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) val_expr = expr[2] if (isinstance(val_expr, list) and val_expr and isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")): return self._emit_define_as_def(name, val_expr, indent, is_async=True) # Shouldn't happen — define-async should always wrap fn/lambda return self._emit_define(expr, indent) def _body_uses_set(self, fn_expr) -> bool: """Check if a fn expression's body (recursively) uses set!.""" def _has_set(node): if not isinstance(node, list) or not node: return False head = node[0] if isinstance(head, Symbol) and head.name == "set!": return True return any(_has_set(child) for child in node if isinstance(child, list)) body = fn_expr[2:] return any(_has_set(b) for b in body) def _emit_define_as_def(self, name: str, fn_expr, indent: int = 0, is_async: bool = False) -> str: """Emit a define with fn value as a proper def statement. This is used for functions that contain set! — Python closures can't rebind outer lambda params, so we need proper def + local variables. Variables mutated by set! from nested lambdas use a _cells dict. When is_async=True, emits 'async def' and sets _in_async so that calls to other async functions receive 'await'. """ pad = " " * indent params = fn_expr[1] body = fn_expr[2:] param_names = [] i = 0 while i < len(params): p = params[i] if isinstance(p, Symbol) and p.name == "&rest": if i + 1 < len(params): rest_name = self._mangle(params[i + 1].name if isinstance(params[i + 1], Symbol) else str(params[i + 1])) param_names.append(f"*{rest_name}") i += 2 continue else: i += 1 continue if isinstance(p, Symbol): param_names.append(self._mangle(p.name)) else: param_names.append(str(p)) i += 1 params_str = ", ".join(param_names) py_name = self._mangle(name) # Find set! target variables that are used from nested lambda scopes nested_set_vars = self._find_nested_set_vars(body) def_kw = "async def" if is_async else "def" lines = [f"{pad}{def_kw} {py_name}({params_str}):"] if nested_set_vars: lines.append(f"{pad} _cells = {{}}") # Emit body with cell var tracking (and async context if needed) old_cells = getattr(self, '_current_cell_vars', set()) old_async = self._in_async self._current_cell_vars = nested_set_vars if is_async: self._in_async = True self._emit_body_stmts(body, lines, indent + 1) self._current_cell_vars = old_cells self._in_async = old_async return "\n".join(lines) def _find_nested_set_vars(self, body) -> set[str]: """Find variable names that are set! from within nested fn/lambda bodies.""" result = set() def _scan(node, in_nested_fn=False): if not isinstance(node, list) or not node: return head = node[0] if isinstance(head, Symbol): if head.name in ("fn", "lambda") and in_nested_fn: # Already nested, keep scanning for child in node[2:]: _scan(child, True) return if head.name in ("fn", "lambda"): # Entering nested fn for child in node[2:]: _scan(child, True) return if head.name == "set!" and in_nested_fn: var = node[1].name if isinstance(node[1], Symbol) else str(node[1]) result.add(self._mangle(var)) for child in node: if isinstance(child, list): _scan(child, in_nested_fn) for b in body: _scan(b) return result def _emit_body_stmts(self, body: list, lines: list, indent: int) -> None: """Emit body expressions as statements into lines list. Handles let as local variable declarations, and returns the last expression. Control flow in tail position (if, cond, case, when) is flattened to if/elif statements with returns in each branch. """ pad = " " * indent for i, expr in enumerate(body): is_last = (i == len(body) - 1) if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): name = expr[0].name if name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, is_last) continue if name in ("do", "begin"): sub_body = expr[1:] if is_last: self._emit_body_stmts(sub_body, lines, indent) else: for sub in sub_body: lines.append(self.emit_statement(sub, indent)) continue if is_last: self._emit_return_expr(expr, lines, indent) else: self._emit_stmt_recursive(expr, lines, indent) def _emit_return_expr(self, expr, lines: list, indent: int) -> None: """Emit an expression in return position, flattening control flow.""" pad = " " * indent if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): name = expr[0].name if name == "if": self._emit_if_return(expr, lines, indent) return if name == "cond": self._emit_cond_return(expr, lines, indent) return if name == "case": self._emit_case_return(expr, lines, indent) return if name == "when": self._emit_when_return(expr, lines, indent) return if name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, True) return if name in ("do", "begin"): self._emit_body_stmts(expr[1:], lines, indent) return if name == "for-each": # for-each in return position: emit as statement, return NIL lines.append(self._emit_for_each_stmt(expr, indent)) lines.append(f"{pad}return NIL") return lines.append(f"{pad}return {self.emit(expr)}") def _emit_if_return(self, expr, lines: list, indent: int) -> None: """Emit if as statement with returns in each branch.""" pad = " " * indent lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):") self._emit_return_expr(expr[2], lines, indent + 1) if len(expr) > 3: lines.append(f"{pad}else:") self._emit_return_expr(expr[3], lines, indent + 1) else: lines.append(f"{pad}return NIL") def _emit_when_return(self, expr, lines: list, indent: int) -> None: """Emit when as statement with return in body, else return NIL.""" pad = " " * indent lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):") body_parts = expr[2:] if len(body_parts) == 1: self._emit_return_expr(body_parts[0], lines, indent + 1) else: for b in body_parts[:-1]: lines.append(self.emit_statement(b, indent + 1)) self._emit_return_expr(body_parts[-1], lines, indent + 1) lines.append(f"{pad}return NIL") def _emit_cond_return(self, expr, lines: list, indent: int) -> None: """Emit cond as if/elif/else with returns in each branch.""" pad = " " * indent clauses = expr[1:] if not clauses: lines.append(f"{pad}return NIL") return is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) has_else = False first_clause = True if is_scheme: for clause in clauses: test, body = clause[0], clause[1] if ((isinstance(test, Symbol) and test.name in ("else", ":else")) or (isinstance(test, Keyword) and test.name == "else")): lines.append(f"{pad}else:") has_else = True else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_return_expr(body, lines, indent + 1) else: i = 0 while i < len(clauses) - 1: test, body = clauses[i], clauses[i + 1] if ((isinstance(test, Keyword) and test.name == "else") or (isinstance(test, Symbol) and test.name in ("else", ":else"))): lines.append(f"{pad}else:") has_else = True else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_return_expr(body, lines, indent + 1) i += 2 if not has_else: lines.append(f"{pad}return NIL") def _emit_case_return(self, expr, lines: list, indent: int) -> None: """Emit case as if/elif/else with returns in each branch.""" pad = " " * indent match_val = self.emit(expr[1]) clauses = expr[2:] lines.append(f"{pad}_match = {match_val}") has_else = False first_clause = True i = 0 while i < len(clauses) - 1: test = clauses[i] body = clauses[i + 1] if ((isinstance(test, Keyword) and test.name == "else") or (isinstance(test, Symbol) and test.name in ("else", ":else"))): lines.append(f"{pad}else:") has_else = True else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} _match == {self.emit(test)}:") first_clause = False self._emit_return_expr(body, lines, indent + 1) i += 2 if not has_else: lines.append(f"{pad}return NIL") def _emit_let_as_stmts(self, expr, lines: list, indent: int, is_last: bool) -> None: """Emit a let expression as local variable declarations.""" pad = " " * indent bindings = expr[1] body = expr[2:] cell_vars = getattr(self, '_current_cell_vars', set()) if isinstance(bindings, list): if bindings and isinstance(bindings[0], list): # Scheme-style: ((name val) ...) for b in bindings: vname = b[0].name if isinstance(b[0], Symbol) else str(b[0]) mangled = self._mangle(vname) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(b[1])}") else: lines.append(f"{pad}{mangled} = {self.emit(b[1])}") else: # Clojure-style: (name val name val ...) for j in range(0, len(bindings), 2): vname = bindings[j].name if isinstance(bindings[j], Symbol) else str(bindings[j]) mangled = self._mangle(vname) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(bindings[j + 1])}") else: lines.append(f"{pad}{mangled} = {self.emit(bindings[j + 1])}") if is_last: self._emit_body_stmts(body, lines, indent) else: for b in body: self._emit_stmt_recursive(b, lines, indent) def _emit_for_each_stmt(self, expr, indent: int = 0) -> str: pad = " " * indent fn_expr = expr[1] coll_expr = expr[2] coll = self.emit(coll_expr) # If fn is an inline lambda, emit a for loop if isinstance(fn_expr, list) and isinstance(fn_expr[0], Symbol) and fn_expr[0].name == "fn": params = fn_expr[1] body = fn_expr[2:] p = params[0].name if isinstance(params[0], Symbol) else str(params[0]) p_py = self._mangle(p) lines = [f"{pad}for {p_py} in {coll}:"] # Emit body as statements with proper let/set! handling self._emit_loop_body(body, lines, indent + 1) return "\n".join(lines) fn = self.emit(fn_expr) return f"{pad}for _item in {coll}:\n{pad} {fn}(_item)" def _emit_loop_body(self, body: list, lines: list, indent: int) -> None: """Emit loop body as statements. Handles let, when, set!, cond properly.""" pad = " " * indent for expr in body: self._emit_stmt_recursive(expr, lines, indent) def _emit_stmt_recursive(self, expr, lines: list, indent: int) -> None: """Emit an expression as statement(s), recursing into control flow.""" pad = " " * indent if not isinstance(expr, list) or not expr: lines.append(self.emit_statement(expr, indent)) return head = expr[0] if not isinstance(head, Symbol): lines.append(self.emit_statement(expr, indent)) return name = head.name if name == "set!": varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) mangled = self._mangle(varname) cell_vars = getattr(self, '_current_cell_vars', set()) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(expr[2])}") else: lines.append(f"{pad}{mangled} = {self.emit(expr[2])}") elif name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, False) elif name == "when": cond = self.emit(expr[1]) lines.append(f"{pad}if sx_truthy({cond}):") for b in expr[2:]: self._emit_stmt_recursive(b, lines, indent + 1) elif name == "cond": self._emit_cond_stmt(expr, lines, indent) elif name in ("do", "begin"): for b in expr[1:]: self._emit_stmt_recursive(b, lines, indent) elif name == "if": cond = self.emit(expr[1]) lines.append(f"{pad}if sx_truthy({cond}):") self._emit_stmt_recursive(expr[2], lines, indent + 1) if len(expr) > 3: lines.append(f"{pad}else:") self._emit_stmt_recursive(expr[3], lines, indent + 1) elif name == "append!": lines.append(f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})") elif name == "dict-set!": lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}") elif name == "env-set!": lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}") else: lines.append(self.emit_statement(expr, indent)) def _emit_cond_stmt(self, expr, lines: list, indent: int) -> None: """Emit cond as if/elif/else chain.""" pad = " " * indent clauses = expr[1:] # Detect scheme vs clojure style is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) first_clause = True if is_scheme: for clause in clauses: test, body = clause[0], clause[1] if isinstance(test, Symbol) and test.name in ("else", ":else"): lines.append(f"{pad}else:") elif isinstance(test, Keyword) and test.name == "else": lines.append(f"{pad}else:") else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_stmt_recursive(body, lines, indent + 1) else: i = 0 while i < len(clauses) - 1: test, body = clauses[i], clauses[i + 1] if isinstance(test, Keyword) and test.name == "else": lines.append(f"{pad}else:") elif isinstance(test, Symbol) and test.name in ("else", ":else"): lines.append(f"{pad}else:") else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_stmt_recursive(body, lines, indent + 1) i += 2 def _emit_quote(self, expr) -> str: """Emit a quoted expression as a Python literal AST.""" if isinstance(expr, bool): return "True" if expr else "False" if isinstance(expr, (int, float)): return str(expr) if isinstance(expr, str): return self._py_string(expr) if expr is None or expr is SX_NIL: return "NIL" if isinstance(expr, Symbol): return f"Symbol({self._py_string(expr.name)})" if isinstance(expr, Keyword): return f"Keyword({self._py_string(expr.name)})" if isinstance(expr, list): return "[" + ", ".join(self._emit_quote(x) for x in expr) + "]" return str(expr) def _py_string(self, s: str) -> str: return repr(s) # --------------------------------------------------------------------------- # Bootstrap compiler # --------------------------------------------------------------------------- def extract_defines(source: str) -> list[tuple[str, list]]: """Parse .sx source, return list of (name, define-expr) for top-level defines. Extracts both (define ...) and (define-async ...) forms. """ exprs = parse_all(source) defines = [] for expr in exprs: if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): if expr[0].name in ("define", "define-async"): name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) defines.append((name, expr)) return defines # Build config and static platform sections — canonical source is platform_py.py try: from .platform_py import ( PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST, PRIMITIVES_PY_MODULES, _ALL_PY_MODULES, PLATFORM_DEPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY, _assemble_primitives_py, public_api_py, ADAPTER_FILES, SPEC_MODULES, EXTENSION_NAMES, EXTENSION_FORMS, ) except ImportError: from shared.sx.ref.platform_py import ( PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST, PRIMITIVES_PY_MODULES, _ALL_PY_MODULES, PLATFORM_DEPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY, _assemble_primitives_py, public_api_py, ADAPTER_FILES, SPEC_MODULES, EXTENSION_NAMES, EXTENSION_FORMS, ) def _parse_special_forms_spec(ref_dir: str) -> set[str]: """Parse special-forms.sx to extract declared form names.""" filepath = os.path.join(ref_dir, "special-forms.sx") if not os.path.exists(filepath): return set() with open(filepath) as f: src = f.read() names = set() for expr in parse_all(src): if (isinstance(expr, list) and len(expr) >= 2 and isinstance(expr[0], Symbol) and expr[0].name == "define-special-form" and isinstance(expr[1], str)): names.add(expr[1]) return names def _extract_eval_dispatch_names(all_sections: list) -> set[str]: """Extract special form names dispatched in eval-list from transpiled sections.""" names = set() for _label, defines in all_sections: for name, _expr in defines: if name.startswith("sf-"): form = name[3:] if form in ("cond-scheme", "cond-clojure", "case-loop"): continue names.add(form) if name.startswith("ho-"): form = name[3:] names.add(form) return names def _validate_special_forms(ref_dir: str, all_sections: list, has_continuations: bool) -> None: """Cross-check special-forms.sx against eval.sx dispatch. Warn on mismatches.""" spec_names = _parse_special_forms_spec(ref_dir) if not spec_names: return dispatch_names = _extract_eval_dispatch_names(all_sections) if has_continuations: dispatch_names |= EXTENSION_FORMS["continuations"] name_aliases = { "thread-first": "->", "every": "every?", "set-bang": "set!", } normalized_dispatch = set() for n in dispatch_names: normalized_dispatch.add(name_aliases.get(n, n)) internal = {"named-let"} normalized_dispatch -= internal undispatched = spec_names - normalized_dispatch ignore = {"fn", "let*", "do", "defrelation"} undispatched -= ignore unspecced = normalized_dispatch - spec_names unspecced -= ignore if undispatched: import sys print(f"# WARNING: special-forms.sx declares forms not in eval.sx: " f"{', '.join(sorted(undispatched))}", file=sys.stderr) if unspecced: import sys print(f"# WARNING: eval.sx dispatches forms not in special-forms.sx: " f"{', '.join(sorted(unspecced))}", file=sys.stderr) def compile_ref_to_py( adapters: list[str] | None = None, modules: list[str] | None = None, extensions: list[str] | None = None, spec_modules: list[str] | None = None, ) -> str: """Read reference .sx files and emit Python. Args: adapters: List of adapter names to include. Valid names: html, sx. None = include all server-side adapters. modules: List of primitive module names to include. core.* are always included. stdlib.* are opt-in. None = include all modules (backward compatible). extensions: List of optional extensions to include. Valid names: continuations. None = no extensions. spec_modules: List of spec module names to include. Valid names: deps, engine. None = no spec modules. """ # Determine which primitive modules to include prim_modules = None # None = all if modules is not None: prim_modules = [m for m in _ALL_PY_MODULES if m.startswith("core.")] for m in modules: if m not in prim_modules: if m not in PRIMITIVES_PY_MODULES: raise ValueError(f"Unknown module: {m!r}. Valid: {', '.join(PRIMITIVES_PY_MODULES)}") prim_modules.append(m) ref_dir = os.path.dirname(os.path.abspath(__file__)) emitter = PyEmitter() # Resolve adapter set if adapters is None: adapter_set = set(ADAPTER_FILES.keys()) else: adapter_set = set() for a in adapters: if a not in ADAPTER_FILES: raise ValueError(f"Unknown adapter: {a!r}. Valid: {', '.join(ADAPTER_FILES)}") adapter_set.add(a) # Resolve spec modules spec_mod_set = set() if spec_modules: for sm in spec_modules: if sm not in SPEC_MODULES: raise ValueError(f"Unknown spec module: {sm!r}. Valid: {', '.join(SPEC_MODULES)}") spec_mod_set.add(sm) # html adapter needs deps (component analysis) and signals (island rendering) if "html" in adapter_set: if "deps" in SPEC_MODULES: spec_mod_set.add("deps") if "signals" in SPEC_MODULES: spec_mod_set.add("signals") if "page-helpers" in SPEC_MODULES: spec_mod_set.add("page-helpers") has_deps = "deps" in spec_mod_set # Core files always included, then selected adapters, then spec modules sx_files = [ ("eval.sx", "eval"), ("forms.sx", "forms (server definition forms)"), ("render.sx", "render (core)"), ] for name in ("html", "sx"): if name in adapter_set: sx_files.append(ADAPTER_FILES[name]) for name in sorted(spec_mod_set): sx_files.append(SPEC_MODULES[name]) # Pre-scan define-async names (needed before transpilation so emitter # knows which calls require 'await') has_async = "async" in adapter_set if has_async: async_filename = ADAPTER_FILES["async"][0] async_filepath = os.path.join(ref_dir, async_filename) if os.path.exists(async_filepath): with open(async_filepath) as f: async_src = f.read() for aexpr in parse_all(async_src): if (isinstance(aexpr, list) and aexpr and isinstance(aexpr[0], Symbol) and aexpr[0].name == "define-async"): aname = aexpr[1].name if isinstance(aexpr[1], Symbol) else str(aexpr[1]) emitter._async_names.add(aname) # Platform async primitives (provided by host, also need await) emitter._async_names.update({ "async-eval", "execute-io", "async-await!", }) # Async adapter is transpiled last (after sync adapters) sx_files.append(ADAPTER_FILES["async"]) all_sections = [] for filename, label in sx_files: filepath = os.path.join(ref_dir, filename) if not os.path.exists(filepath): continue with open(filepath) as f: src = f.read() defines = extract_defines(src) all_sections.append((label, defines)) # Resolve extensions ext_set = set() if extensions: for e in extensions: if e not in EXTENSION_NAMES: raise ValueError(f"Unknown extension: {e!r}. Valid: {', '.join(EXTENSION_NAMES)}") ext_set.add(e) has_continuations = "continuations" in ext_set # Validate special forms _validate_special_forms(ref_dir, all_sections, has_continuations) # Build output has_html = "html" in adapter_set has_sx = "sx" in adapter_set parts = [] parts.append(PREAMBLE) parts.append(PLATFORM_PY) parts.append(PRIMITIVES_PY_PRE) parts.append(_assemble_primitives_py(prim_modules)) parts.append(PRIMITIVES_PY_POST) if has_deps: parts.append(PLATFORM_DEPS_PY) if has_async: parts.append(PLATFORM_ASYNC_PY) for label, defines in all_sections: parts.append(f"\n# === Transpiled from {label} ===\n") for name, expr in defines: parts.append(f"# {name}") parts.append(emitter.emit_statement(expr)) parts.append("") parts.append(FIXUPS_PY) if has_continuations: parts.append(CONTINUATIONS_PY) parts.append(public_api_py(has_html, has_sx, has_deps, has_async)) return "\n".join(parts) # NOTE: Static platform sections (PREAMBLE, PLATFORM_PY, PRIMITIVES_*, etc.) # are now imported from platform_py.py above. Do not redefine them here. # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): import argparse parser = argparse.ArgumentParser(description="Bootstrap SX spec -> Python") parser.add_argument( "--adapters", default=None, help="Comma-separated adapter names (html,sx). Default: all server-side.", ) parser.add_argument( "--modules", default=None, help="Comma-separated primitive modules (core.* always included). Default: all.", ) parser.add_argument( "--extensions", default=None, help="Comma-separated extensions (continuations). Default: none.", ) parser.add_argument( "--spec-modules", default=None, help="Comma-separated spec modules (deps,engine). Default: none.", ) args = parser.parse_args() adapters = args.adapters.split(",") if args.adapters else None modules = args.modules.split(",") if args.modules else None extensions = args.extensions.split(",") if args.extensions else None spec_modules = args.spec_modules.split(",") if args.spec_modules else None print(compile_ref_to_py(adapters, modules, extensions, spec_modules)) if __name__ == "__main__": main()