#!/usr/bin/env python3 """ Bootstrap compiler: reference SX evaluator -> Python. Reads the .sx reference specification and emits a standalone Python evaluator module (sx_ref.py) that can be compared against the hand-written evaluator.py / html.py / async_eval.py. The compiler translates the restricted SX subset used in eval.sx/render.sx into idiomatic Python. Platform interface functions are emitted as native Python implementations. Usage: python bootstrap_py.py > sx_ref.py """ from __future__ import annotations import os import sys # Add project root to path for imports _HERE = os.path.dirname(os.path.abspath(__file__)) _PROJECT = os.path.abspath(os.path.join(_HERE, "..", "..", "..")) sys.path.insert(0, _PROJECT) from shared.sx.parser import parse_all from shared.sx.types import Symbol, Keyword, NIL as SX_NIL # --------------------------------------------------------------------------- # SX -> Python transpiler # --------------------------------------------------------------------------- # Python reserved words — SX names that collide get _ suffix # Excludes names we intentionally shadow (list, dict, range, filter, map) _PY_RESERVED = frozenset({ "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class", "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield", # builtins we don't want to shadow "default", "type", "id", "input", "open", "print", "set", "super", }) class PyEmitter: """Transpile an SX AST node to Python source code.""" def __init__(self): self.indent = 0 def emit(self, expr) -> str: """Emit a Python expression from an SX AST node.""" # Bool MUST be checked before int (bool is subclass of int in Python) if isinstance(expr, bool): return "True" if expr else "False" if isinstance(expr, (int, float)): return str(expr) if isinstance(expr, str): return self._py_string(expr) if expr is None or expr is SX_NIL: return "NIL" if isinstance(expr, Symbol): return self._emit_symbol(expr.name) if isinstance(expr, Keyword): return self._py_string(expr.name) if isinstance(expr, dict): return self._emit_native_dict(expr) if isinstance(expr, list): return self._emit_list(expr) return str(expr) def emit_statement(self, expr, indent: int = 0) -> str: """Emit a Python statement from an SX AST node.""" pad = " " * indent if isinstance(expr, list) and expr: head = expr[0] if isinstance(head, Symbol): name = head.name if name == "define": return self._emit_define(expr, indent) if name == "set!": return f"{pad}{self._mangle(expr[1].name)} = {self.emit(expr[2])}" if name == "when": return self._emit_when_stmt(expr, indent) if name == "do" or name == "begin": return "\n".join(self.emit_statement(e, indent) for e in expr[1:]) if name == "for-each": return self._emit_for_each_stmt(expr, indent) if name == "dict-set!": return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}" if name == "append!": return f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})" if name == "env-set!": return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}" if name == "set-lambda-name!": return f"{pad}{self.emit(expr[1])}.name = {self.emit(expr[2])}" return f"{pad}{self.emit(expr)}" # --- Symbol emission --- def _emit_symbol(self, name: str) -> str: mangled = self._mangle(name) cell_vars = getattr(self, '_current_cell_vars', set()) if mangled in cell_vars: return f"_cells[{self._py_string(mangled)}]" return mangled def _mangle(self, name: str) -> str: """Convert SX identifier to valid Python identifier.""" RENAMES = { "nil": "NIL", "true": "True", "false": "False", "nil?": "is_nil", "type-of": "type_of", "symbol-name": "symbol_name", "keyword-name": "keyword_name", "make-lambda": "make_lambda", "make-component": "make_component", "make-macro": "make_macro", "make-thunk": "make_thunk", "make-handler-def": "make_handler_def", "make-query-def": "make_query_def", "make-action-def": "make_action_def", "make-page-def": "make_page_def", "make-symbol": "make_symbol", "make-keyword": "make_keyword", "lambda-params": "lambda_params", "lambda-body": "lambda_body", "lambda-closure": "lambda_closure", "lambda-name": "lambda_name", "set-lambda-name!": "set_lambda_name", "component-params": "component_params", "component-body": "component_body", "component-closure": "component_closure", "component-has-children?": "component_has_children", "component-name": "component_name", "component-affinity": "component_affinity", "macro-params": "macro_params", "macro-rest-param": "macro_rest_param", "macro-body": "macro_body", "macro-closure": "macro_closure", "thunk?": "is_thunk", "thunk-expr": "thunk_expr", "thunk-env": "thunk_env", "callable?": "is_callable", "lambda?": "is_lambda", "component?": "is_component", "island?": "is_island", "make-island": "make_island", "make-signal": "make_signal", "signal?": "is_signal", "signal-value": "signal_value", "signal-set-value!": "signal_set_value", "signal-subscribers": "signal_subscribers", "signal-add-sub!": "signal_add_sub", "signal-remove-sub!": "signal_remove_sub", "signal-deps": "signal_deps", "signal-set-deps!": "signal_set_deps", "set-tracking-context!": "set_tracking_context", "get-tracking-context": "get_tracking_context", "make-tracking-context": "make_tracking_context", "tracking-context-deps": "tracking_context_deps", "tracking-context-add-dep!": "tracking_context_add_dep", "tracking-context-notify-fn": "tracking_context_notify_fn", "identical?": "is_identical", "notify-subscribers": "notify_subscribers", "flush-subscribers": "flush_subscribers", "dispose-computed": "dispose_computed", "with-island-scope": "with_island_scope", "register-in-scope": "register_in_scope", "*batch-depth*": "_batch_depth", "*batch-queue*": "_batch_queue", "*island-scope*": "_island_scope", "*store-registry*": "_store_registry", "def-store": "def_store", "use-store": "use_store", "clear-stores": "clear_stores", "emit-event": "emit_event", "on-event": "on_event", "bridge-event": "bridge_event", "dom-listen": "dom_listen", "dom-dispatch": "dom_dispatch", "event-detail": "event_detail", "macro?": "is_macro", "primitive?": "is_primitive", "get-primitive": "get_primitive", "env-has?": "env_has", "env-get": "env_get", "env-set!": "env_set", "env-extend": "env_extend", "env-merge": "env_merge", "dict-set!": "dict_set", "dict-get": "dict_get", "dict-has?": "dict_has", "dict-delete!": "dict_delete", "eval-expr": "eval_expr", "eval-list": "eval_list", "eval-call": "eval_call", "is-render-expr?": "is_render_expr", "render-expr": "render_expr", "call-lambda": "call_lambda", "call-component": "call_component", "parse-keyword-args": "parse_keyword_args", "parse-comp-params": "parse_comp_params", "parse-macro-params": "parse_macro_params", "expand-macro": "expand_macro", "render-to-html": "render_to_html", "render-to-sx": "render_to_sx", "render-value-to-html": "render_value_to_html", "render-list-to-html": "render_list_to_html", "render-html-element": "render_html_element", "render-html-component": "render_html_component", "parse-element-args": "parse_element_args", "render-attrs": "render_attrs", "aser-list": "aser_list", "aser-fragment": "aser_fragment", "aser-call": "aser_call", "aser-special": "aser_special", "sf-if": "sf_if", "sf-when": "sf_when", "sf-cond": "sf_cond", "sf-cond-scheme": "sf_cond_scheme", "sf-cond-clojure": "sf_cond_clojure", "sf-case": "sf_case", "sf-case-loop": "sf_case_loop", "sf-and": "sf_and", "sf-or": "sf_or", "sf-let": "sf_let", "sf-lambda": "sf_lambda", "sf-define": "sf_define", "sf-defcomp": "sf_defcomp", "defcomp-kwarg": "defcomp_kwarg", "sf-defmacro": "sf_defmacro", "sf-begin": "sf_begin", "sf-quote": "sf_quote", "sf-quasiquote": "sf_quasiquote", "sf-thread-first": "sf_thread_first", "sf-set!": "sf_set_bang", "sf-reset": "sf_reset", "sf-shift": "sf_shift", "qq-expand": "qq_expand", "ho-map": "ho_map", "ho-map-indexed": "ho_map_indexed", "ho-filter": "ho_filter", "ho-reduce": "ho_reduce", "ho-some": "ho_some", "ho-every": "ho_every", "ho-for-each": "ho_for_each", "sf-defstyle": "sf_defstyle", "special-form?": "is_special_form", "ho-form?": "is_ho_form", "strip-prefix": "strip_prefix", "escape-html": "escape_html", "escape-attr": "escape_attr", "escape-string": "escape_string", "raw-html-content": "raw_html_content", "HTML_TAGS": "HTML_TAGS", "VOID_ELEMENTS": "VOID_ELEMENTS", "BOOLEAN_ATTRS": "BOOLEAN_ATTRS", # render.sx core "definition-form?": "is_definition_form", # adapter-html.sx "RENDER_HTML_FORMS": "RENDER_HTML_FORMS", "render-html-form?": "is_render_html_form", "dispatch-html-form": "dispatch_html_form", "render-lambda-html": "render_lambda_html", "make-raw-html": "make_raw_html", "render-html-island": "render_html_island", "serialize-island-state": "serialize_island_state", "json-serialize": "json_serialize", "empty-dict?": "is_empty_dict", "sf-defisland": "sf_defisland", # adapter-sx.sx "render-to-sx": "render_to_sx", "aser": "aser", "eval-case-aser": "eval_case_aser", "sx-serialize": "sx_serialize", "sx-serialize-dict": "sx_serialize_dict", "sx-expr-source": "sx_expr_source", # Primitives that need exact aliases "contains?": "contains_p", "starts-with?": "starts_with_p", "ends-with?": "ends_with_p", "empty?": "empty_p", "every?": "every_p", "for-each": "for_each", "for-each-indexed": "for_each_indexed", "map-indexed": "map_indexed", "map-dict": "map_dict", "eval-cond": "eval_cond", "eval-cond-scheme": "eval_cond_scheme", "eval-cond-clojure": "eval_cond_clojure", "process-bindings": "process_bindings", # deps.sx "scan-refs": "scan_refs", "scan-refs-walk": "scan_refs_walk", "transitive-deps": "transitive_deps", "compute-all-deps": "compute_all_deps", "scan-components-from-source": "scan_components_from_source", "components-needed": "components_needed", "page-component-bundle": "page_component_bundle", "page-css-classes": "page_css_classes", "component-deps": "component_deps", "component-set-deps!": "component_set_deps", "component-css-classes": "component_css_classes", "component-io-refs": "component_io_refs", "component-set-io-refs!": "component_set_io_refs", "env-components": "env_components", "regex-find-all": "regex_find_all", "scan-css-classes": "scan_css_classes", # deps.sx IO detection "scan-io-refs": "scan_io_refs", "scan-io-refs-walk": "scan_io_refs_walk", "transitive-io-refs": "transitive_io_refs", "compute-all-io-refs": "compute_all_io_refs", "component-io-refs-cached": "component_io_refs_cached", "component-pure?": "component_pure_p", "render-target": "render_target", "page-render-plan": "page_render_plan", # router.sx "split-path-segments": "split_path_segments", "make-route-segment": "make_route_segment", "parse-route-pattern": "parse_route_pattern", "match-route-segments": "match_route_segments", "match-route": "match_route", "find-matching-route": "find_matching_route", } if name in RENAMES: return RENAMES[name] # General mangling result = name # Handle trailing ? and ! if result.endswith("?"): result = result[:-1] + "_p" elif result.endswith("!"): result = result[:-1] + "_b" # Kebab to snake_case result = result.replace("-", "_") # Escape Python reserved words if result in _PY_RESERVED: result = result + "_" return result # --- List emission --- def _emit_list(self, expr: list) -> str: if not expr: return "[]" head = expr[0] if not isinstance(head, Symbol): # Data list return "[" + ", ".join(self.emit(x) for x in expr) + "]" name = head.name handler = getattr( self, f"_sf_{name.replace('-', '_').replace('!', '_b').replace('?', '_p')}", None, ) if handler: return handler(expr) # Built-in forms if name in ("fn", "lambda"): return self._emit_fn(expr) if name in ("let", "let*"): return self._emit_let(expr) if name == "if": return self._emit_if(expr) if name == "when": return self._emit_when(expr) if name == "cond": return self._emit_cond(expr) if name == "case": return self._emit_case(expr) if name == "and": return self._emit_and(expr) if name == "or": return self._emit_or(expr) if name == "not": return f"(not sx_truthy({self.emit(expr[1])}))" if name in ("do", "begin"): return self._emit_do(expr) if name == "list": return "[" + ", ".join(self.emit(x) for x in expr[1:]) + "]" if name == "dict": return self._emit_dict_literal(expr) if name == "quote": return self._emit_quote(expr[1]) if name == "set!": # set! in expression context — use nonlocal_cells dict for mutation # from nested lambdas (Python closures can read but not rebind outer vars) varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) py_var = self._mangle(varname) return f"_sx_cell_set(_cells, {self._py_string(py_var)}, {self.emit(expr[2])})" if name == "str": parts = [self.emit(x) for x in expr[1:]] return "sx_str(" + ", ".join(parts) + ")" # Mutation forms that can appear in expression context if name == "append!": return f"_sx_append({self.emit(expr[1])}, {self.emit(expr[2])})" if name == "dict-set!": return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})" if name == "env-set!": return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})" if name == "set-lambda-name!": return f"_sx_set_attr({self.emit(expr[1])}, 'name', {self.emit(expr[2])})" # Infix operators if name in ("+", "-", "*", "/", "=", "!=", "<", ">", "<=", ">=", "mod"): return self._emit_infix(name, expr[1:]) if name == "inc": return f"({self.emit(expr[1])} + 1)" if name == "dec": return f"({self.emit(expr[1])} - 1)" # Regular function call fn_name = self._mangle(name) args = ", ".join(self.emit(x) for x in expr[1:]) return f"{fn_name}({args})" # --- Special form emitters --- def _emit_fn(self, expr) -> str: params = expr[1] body = expr[2:] param_names = [] rest_name = None i = 0 while i < len(params): p = params[i] if isinstance(p, Symbol) and p.name == "&rest": # Next param is the rest parameter if i + 1 < len(params): rest_name = self._mangle(params[i + 1].name if isinstance(params[i + 1], Symbol) else str(params[i + 1])) i += 2 continue else: i += 1 continue if isinstance(p, Symbol): param_names.append(self._mangle(p.name)) else: param_names.append(str(p)) i += 1 if rest_name: param_names.append(f"*{rest_name}") params_str = ", ".join(param_names) if len(body) == 1: body_py = self.emit(body[0]) return f"lambda {params_str}: {body_py}" # Multi-expression body: need a local function lines = [] lines.append(f"_sx_fn(lambda {params_str}: (") for b in body[:-1]: lines.append(f" {self.emit(b)},") lines.append(f" {self.emit(body[-1])}") lines.append(")[-1])") return "\n".join(lines) def _emit_let(self, expr) -> str: bindings = expr[1] body = expr[2:] assignments = [] if isinstance(bindings, list): if bindings and isinstance(bindings[0], list): # Scheme-style: ((name val) ...) for b in bindings: vname = b[0].name if isinstance(b[0], Symbol) else str(b[0]) assignments.append((self._mangle(vname), self.emit(b[1]))) else: # Clojure-style: (name val name val ...) for i in range(0, len(bindings), 2): vname = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i]) assignments.append((self._mangle(vname), self.emit(bindings[i + 1]))) # Nested IIFE for sequential let (each binding can see previous ones): # (lambda a: (lambda b: body)(val_b))(val_a) # Cell variables (mutated by nested set!) are initialized in _cells dict # instead of lambda params, since the body reads _cells[name]. cell_vars = getattr(self, '_current_cell_vars', set()) body_parts = [self.emit(b) for b in body] if len(body) == 1: body_str = body_parts[0] else: body_str = f"_sx_begin({', '.join(body_parts)})" # Build from inside out result = body_str for name, val in reversed(assignments): if name in cell_vars: # Cell var: initialize in _cells dict, not as lambda param result = f"_sx_begin(_sx_cell_set(_cells, {self._py_string(name)}, {val}), {result})" else: result = f"(lambda {name}: {result})({val})" return result def _emit_if(self, expr) -> str: cond = self.emit(expr[1]) then = self.emit(expr[2]) els = self.emit(expr[3]) if len(expr) > 3 else "NIL" return f"({then} if sx_truthy({cond}) else {els})" def _emit_when(self, expr) -> str: cond = self.emit(expr[1]) body_parts = expr[2:] if len(body_parts) == 1: return f"({self.emit(body_parts[0])} if sx_truthy({cond}) else NIL)" body = ", ".join(self.emit(b) for b in body_parts) return f"(_sx_begin({body}) if sx_truthy({cond}) else NIL)" def _emit_when_stmt(self, expr, indent: int = 0) -> str: pad = " " * indent cond = self.emit(expr[1]) body_parts = expr[2:] lines = [f"{pad}if sx_truthy({cond}):"] for b in body_parts: lines.append(self.emit_statement(b, indent + 1)) return "\n".join(lines) def _emit_cond(self, expr) -> str: clauses = expr[1:] if not clauses: return "NIL" is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) if is_scheme: return self._cond_scheme(clauses) return self._cond_clojure(clauses) def _cond_scheme(self, clauses) -> str: if not clauses: return "NIL" clause = clauses[0] test = clause[0] body = clause[1] if isinstance(test, Symbol) and test.name in ("else", ":else"): return self.emit(body) if isinstance(test, Keyword) and test.name == "else": return self.emit(body) return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_scheme(clauses[1:])})" def _cond_clojure(self, clauses) -> str: if len(clauses) < 2: return "NIL" test = clauses[0] body = clauses[1] if isinstance(test, Keyword) and test.name == "else": return self.emit(body) if isinstance(test, Symbol) and test.name in ("else", ":else"): return self.emit(body) return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_clojure(clauses[2:])})" def _emit_case(self, expr) -> str: match_expr = self.emit(expr[1]) clauses = expr[2:] return f"_sx_case({match_expr}, [{self._case_pairs(clauses)}])" def _case_pairs(self, clauses) -> str: pairs = [] i = 0 while i < len(clauses) - 1: test = clauses[i] body = clauses[i + 1] if isinstance(test, Keyword) and test.name == "else": pairs.append(f"(None, lambda: {self.emit(body)})") elif isinstance(test, Symbol) and test.name in ("else", ":else"): pairs.append(f"(None, lambda: {self.emit(body)})") else: pairs.append(f"({self.emit(test)}, lambda: {self.emit(body)})") i += 2 return ", ".join(pairs) def _emit_and(self, expr) -> str: parts = [self.emit(x) for x in expr[1:]] if len(parts) == 1: return parts[0] # Use Python's native and for short-circuit evaluation. # Last value returned as-is; prior values tested with sx_truthy. # (and a b c) -> (a if not sx_truthy(a) else (b if not sx_truthy(b) else c)) result = parts[-1] for p in reversed(parts[:-1]): result = f"({p} if not sx_truthy({p}) else {result})" return result def _emit_or(self, expr) -> str: if len(expr) == 2: return self.emit(expr[1]) parts = [self.emit(x) for x in expr[1:]] # Use Python's short-circuit pattern: # (or a b c) -> (a if sx_truthy(a) else (b if sx_truthy(b) else c)) result = parts[-1] for p in reversed(parts[:-1]): result = f"({p} if sx_truthy({p}) else {result})" return result def _emit_do(self, expr) -> str: return self._emit_do_inner(expr[1:]) def _emit_do_inner(self, exprs) -> str: if len(exprs) == 1: return self.emit(exprs[0]) parts = [self.emit(e) for e in exprs] return "_sx_begin(" + ", ".join(parts) + ")" def _emit_native_dict(self, expr: dict) -> str: """Emit a native Python dict (from parser's {:key val} syntax).""" parts = [] for key, val in expr.items(): parts.append(f"{self._py_string(key)}: {self.emit(val)}") return "{" + ", ".join(parts) + "}" def _emit_dict_literal(self, expr) -> str: pairs = expr[1:] parts = [] i = 0 while i < len(pairs) - 1: key = pairs[i] val = pairs[i + 1] if isinstance(key, Keyword): parts.append(f"{self._py_string(key.name)}: {self.emit(val)}") else: parts.append(f"{self.emit(key)}: {self.emit(val)}") i += 2 return "{" + ", ".join(parts) + "}" def _emit_infix(self, op: str, args: list) -> str: PY_OPS = {"=": "==", "!=": "!=", "mod": "%"} py_op = PY_OPS.get(op, op) if len(args) == 1 and op == "-": return f"(-{self.emit(args[0])})" return f"({self.emit(args[0])} {py_op} {self.emit(args[1])})" def _emit_define(self, expr, indent: int = 0) -> str: pad = " " * indent name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) val_expr = expr[2] # If value is a lambda/fn, check if body uses set! on let-bound vars # and emit as def for proper mutation support if (isinstance(val_expr, list) and val_expr and isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda") and self._body_uses_set(val_expr)): return self._emit_define_as_def(name, val_expr, indent) val = self.emit(val_expr) return f"{pad}{self._mangle(name)} = {val}" def _body_uses_set(self, fn_expr) -> bool: """Check if a fn expression's body (recursively) uses set!.""" def _has_set(node): if not isinstance(node, list) or not node: return False head = node[0] if isinstance(head, Symbol) and head.name == "set!": return True return any(_has_set(child) for child in node if isinstance(child, list)) body = fn_expr[2:] return any(_has_set(b) for b in body) def _emit_define_as_def(self, name: str, fn_expr, indent: int = 0) -> str: """Emit a define with fn value as a proper def statement. This is used for functions that contain set! — Python closures can't rebind outer lambda params, so we need proper def + local variables. Variables mutated by set! from nested lambdas use a _cells dict. """ pad = " " * indent params = fn_expr[1] body = fn_expr[2:] param_names = [] for p in params: if isinstance(p, Symbol): param_names.append(self._mangle(p.name)) else: param_names.append(str(p)) params_str = ", ".join(param_names) py_name = self._mangle(name) # Find set! target variables that are used from nested lambda scopes nested_set_vars = self._find_nested_set_vars(body) lines = [f"{pad}def {py_name}({params_str}):"] if nested_set_vars: lines.append(f"{pad} _cells = {{}}") # Emit body with cell var tracking old_cells = getattr(self, '_current_cell_vars', set()) self._current_cell_vars = nested_set_vars self._emit_body_stmts(body, lines, indent + 1) self._current_cell_vars = old_cells return "\n".join(lines) def _find_nested_set_vars(self, body) -> set[str]: """Find variable names that are set! from within nested fn/lambda bodies.""" result = set() def _scan(node, in_nested_fn=False): if not isinstance(node, list) or not node: return head = node[0] if isinstance(head, Symbol): if head.name in ("fn", "lambda") and in_nested_fn: # Already nested, keep scanning for child in node[2:]: _scan(child, True) return if head.name in ("fn", "lambda"): # Entering nested fn for child in node[2:]: _scan(child, True) return if head.name == "set!" and in_nested_fn: var = node[1].name if isinstance(node[1], Symbol) else str(node[1]) result.add(self._mangle(var)) for child in node: if isinstance(child, list): _scan(child, in_nested_fn) for b in body: _scan(b) return result def _emit_body_stmts(self, body: list, lines: list, indent: int) -> None: """Emit body expressions as statements into lines list. Handles let as local variable declarations, and returns the last expression. """ pad = " " * indent for i, expr in enumerate(body): is_last = (i == len(body) - 1) if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): name = expr[0].name if name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, is_last) continue if name in ("do", "begin"): sub_body = expr[1:] if is_last: self._emit_body_stmts(sub_body, lines, indent) else: for sub in sub_body: lines.append(self.emit_statement(sub, indent)) continue if is_last: lines.append(f"{pad}return {self.emit(expr)}") else: lines.append(self.emit_statement(expr, indent)) def _emit_let_as_stmts(self, expr, lines: list, indent: int, is_last: bool) -> None: """Emit a let expression as local variable declarations.""" pad = " " * indent bindings = expr[1] body = expr[2:] cell_vars = getattr(self, '_current_cell_vars', set()) if isinstance(bindings, list): if bindings and isinstance(bindings[0], list): # Scheme-style: ((name val) ...) for b in bindings: vname = b[0].name if isinstance(b[0], Symbol) else str(b[0]) mangled = self._mangle(vname) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(b[1])}") else: lines.append(f"{pad}{mangled} = {self.emit(b[1])}") else: # Clojure-style: (name val name val ...) for j in range(0, len(bindings), 2): vname = bindings[j].name if isinstance(bindings[j], Symbol) else str(bindings[j]) mangled = self._mangle(vname) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(bindings[j + 1])}") else: lines.append(f"{pad}{mangled} = {self.emit(bindings[j + 1])}") if is_last: self._emit_body_stmts(body, lines, indent) else: for b in body: self._emit_stmt_recursive(b, lines, indent) def _emit_for_each_stmt(self, expr, indent: int = 0) -> str: pad = " " * indent fn_expr = expr[1] coll_expr = expr[2] coll = self.emit(coll_expr) # If fn is an inline lambda, emit a for loop if isinstance(fn_expr, list) and isinstance(fn_expr[0], Symbol) and fn_expr[0].name == "fn": params = fn_expr[1] body = fn_expr[2:] p = params[0].name if isinstance(params[0], Symbol) else str(params[0]) p_py = self._mangle(p) lines = [f"{pad}for {p_py} in {coll}:"] # Emit body as statements with proper let/set! handling self._emit_loop_body(body, lines, indent + 1) return "\n".join(lines) fn = self.emit(fn_expr) return f"{pad}for _item in {coll}:\n{pad} {fn}(_item)" def _emit_loop_body(self, body: list, lines: list, indent: int) -> None: """Emit loop body as statements. Handles let, when, set!, cond properly.""" pad = " " * indent for expr in body: self._emit_stmt_recursive(expr, lines, indent) def _emit_stmt_recursive(self, expr, lines: list, indent: int) -> None: """Emit an expression as statement(s), recursing into control flow.""" pad = " " * indent if not isinstance(expr, list) or not expr: lines.append(self.emit_statement(expr, indent)) return head = expr[0] if not isinstance(head, Symbol): lines.append(self.emit_statement(expr, indent)) return name = head.name if name == "set!": varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) mangled = self._mangle(varname) cell_vars = getattr(self, '_current_cell_vars', set()) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(expr[2])}") else: lines.append(f"{pad}{mangled} = {self.emit(expr[2])}") elif name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, False) elif name == "when": cond = self.emit(expr[1]) lines.append(f"{pad}if sx_truthy({cond}):") for b in expr[2:]: self._emit_stmt_recursive(b, lines, indent + 1) elif name == "cond": self._emit_cond_stmt(expr, lines, indent) elif name in ("do", "begin"): for b in expr[1:]: self._emit_stmt_recursive(b, lines, indent) elif name == "if": cond = self.emit(expr[1]) lines.append(f"{pad}if sx_truthy({cond}):") self._emit_stmt_recursive(expr[2], lines, indent + 1) if len(expr) > 3: lines.append(f"{pad}else:") self._emit_stmt_recursive(expr[3], lines, indent + 1) elif name == "append!": lines.append(f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})") elif name == "dict-set!": lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}") elif name == "env-set!": lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}") else: lines.append(self.emit_statement(expr, indent)) def _emit_cond_stmt(self, expr, lines: list, indent: int) -> None: """Emit cond as if/elif/else chain.""" pad = " " * indent clauses = expr[1:] # Detect scheme vs clojure style is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) first_clause = True if is_scheme: for clause in clauses: test, body = clause[0], clause[1] if isinstance(test, Symbol) and test.name in ("else", ":else"): lines.append(f"{pad}else:") elif isinstance(test, Keyword) and test.name == "else": lines.append(f"{pad}else:") else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_stmt_recursive(body, lines, indent + 1) else: i = 0 while i < len(clauses) - 1: test, body = clauses[i], clauses[i + 1] if isinstance(test, Keyword) and test.name == "else": lines.append(f"{pad}else:") elif isinstance(test, Symbol) and test.name in ("else", ":else"): lines.append(f"{pad}else:") else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_stmt_recursive(body, lines, indent + 1) i += 2 def _emit_quote(self, expr) -> str: """Emit a quoted expression as a Python literal AST.""" if isinstance(expr, bool): return "True" if expr else "False" if isinstance(expr, (int, float)): return str(expr) if isinstance(expr, str): return self._py_string(expr) if expr is None or expr is SX_NIL: return "NIL" if isinstance(expr, Symbol): return f"Symbol({self._py_string(expr.name)})" if isinstance(expr, Keyword): return f"Keyword({self._py_string(expr.name)})" if isinstance(expr, list): return "[" + ", ".join(self._emit_quote(x) for x in expr) + "]" return str(expr) def _py_string(self, s: str) -> str: return repr(s) # --------------------------------------------------------------------------- # Bootstrap compiler # --------------------------------------------------------------------------- def extract_defines(source: str) -> list[tuple[str, list]]: """Parse .sx source, return list of (name, define-expr) for top-level defines.""" exprs = parse_all(source) defines = [] for expr in exprs: if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): if expr[0].name == "define": name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) defines.append((name, expr)) return defines # Build config and static platform sections — canonical source is platform_py.py try: from .platform_py import ( PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST, PRIMITIVES_PY_MODULES, _ALL_PY_MODULES, PLATFORM_DEPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY, _assemble_primitives_py, public_api_py, ADAPTER_FILES, SPEC_MODULES, EXTENSION_NAMES, EXTENSION_FORMS, ) except ImportError: from shared.sx.ref.platform_py import ( PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST, PRIMITIVES_PY_MODULES, _ALL_PY_MODULES, PLATFORM_DEPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY, _assemble_primitives_py, public_api_py, ADAPTER_FILES, SPEC_MODULES, EXTENSION_NAMES, EXTENSION_FORMS, ) def _parse_special_forms_spec(ref_dir: str) -> set[str]: """Parse special-forms.sx to extract declared form names.""" filepath = os.path.join(ref_dir, "special-forms.sx") if not os.path.exists(filepath): return set() with open(filepath) as f: src = f.read() names = set() for expr in parse_all(src): if (isinstance(expr, list) and len(expr) >= 2 and isinstance(expr[0], Symbol) and expr[0].name == "define-special-form" and isinstance(expr[1], str)): names.add(expr[1]) return names def _extract_eval_dispatch_names(all_sections: list) -> set[str]: """Extract special form names dispatched in eval-list from transpiled sections.""" names = set() for _label, defines in all_sections: for name, _expr in defines: if name.startswith("sf-"): form = name[3:] if form in ("cond-scheme", "cond-clojure", "case-loop"): continue names.add(form) if name.startswith("ho-"): form = name[3:] names.add(form) return names def _validate_special_forms(ref_dir: str, all_sections: list, has_continuations: bool) -> None: """Cross-check special-forms.sx against eval.sx dispatch. Warn on mismatches.""" spec_names = _parse_special_forms_spec(ref_dir) if not spec_names: return dispatch_names = _extract_eval_dispatch_names(all_sections) if has_continuations: dispatch_names |= EXTENSION_FORMS["continuations"] name_aliases = { "thread-first": "->", "every": "every?", "set-bang": "set!", } normalized_dispatch = set() for n in dispatch_names: normalized_dispatch.add(name_aliases.get(n, n)) internal = {"named-let"} normalized_dispatch -= internal undispatched = spec_names - normalized_dispatch ignore = {"fn", "let*", "do", "defrelation"} undispatched -= ignore unspecced = normalized_dispatch - spec_names unspecced -= ignore if undispatched: import sys print(f"# WARNING: special-forms.sx declares forms not in eval.sx: " f"{', '.join(sorted(undispatched))}", file=sys.stderr) if unspecced: import sys print(f"# WARNING: eval.sx dispatches forms not in special-forms.sx: " f"{', '.join(sorted(unspecced))}", file=sys.stderr) def compile_ref_to_py( adapters: list[str] | None = None, modules: list[str] | None = None, extensions: list[str] | None = None, spec_modules: list[str] | None = None, ) -> str: """Read reference .sx files and emit Python. Args: adapters: List of adapter names to include. Valid names: html, sx. None = include all server-side adapters. modules: List of primitive module names to include. core.* are always included. stdlib.* are opt-in. None = include all modules (backward compatible). extensions: List of optional extensions to include. Valid names: continuations. None = no extensions. spec_modules: List of spec module names to include. Valid names: deps, engine. None = no spec modules. """ # Determine which primitive modules to include prim_modules = None # None = all if modules is not None: prim_modules = [m for m in _ALL_PY_MODULES if m.startswith("core.")] for m in modules: if m not in prim_modules: if m not in PRIMITIVES_PY_MODULES: raise ValueError(f"Unknown module: {m!r}. Valid: {', '.join(PRIMITIVES_PY_MODULES)}") prim_modules.append(m) ref_dir = os.path.dirname(os.path.abspath(__file__)) emitter = PyEmitter() # Resolve adapter set if adapters is None: adapter_set = set(ADAPTER_FILES.keys()) else: adapter_set = set() for a in adapters: if a not in ADAPTER_FILES: raise ValueError(f"Unknown adapter: {a!r}. Valid: {', '.join(ADAPTER_FILES)}") adapter_set.add(a) # Resolve spec modules spec_mod_set = set() if spec_modules: for sm in spec_modules: if sm not in SPEC_MODULES: raise ValueError(f"Unknown spec module: {sm!r}. Valid: {', '.join(SPEC_MODULES)}") spec_mod_set.add(sm) # html adapter needs deps (component analysis) and signals (island rendering) if "html" in adapter_set: if "deps" in SPEC_MODULES: spec_mod_set.add("deps") if "signals" in SPEC_MODULES: spec_mod_set.add("signals") has_deps = "deps" in spec_mod_set # Core files always included, then selected adapters, then spec modules sx_files = [ ("eval.sx", "eval"), ("forms.sx", "forms (server definition forms)"), ("render.sx", "render (core)"), ] for name in ("html", "sx"): if name in adapter_set: sx_files.append(ADAPTER_FILES[name]) for name in sorted(spec_mod_set): sx_files.append(SPEC_MODULES[name]) all_sections = [] for filename, label in sx_files: filepath = os.path.join(ref_dir, filename) if not os.path.exists(filepath): continue with open(filepath) as f: src = f.read() defines = extract_defines(src) all_sections.append((label, defines)) # Resolve extensions ext_set = set() if extensions: for e in extensions: if e not in EXTENSION_NAMES: raise ValueError(f"Unknown extension: {e!r}. Valid: {', '.join(EXTENSION_NAMES)}") ext_set.add(e) has_continuations = "continuations" in ext_set # Validate special forms _validate_special_forms(ref_dir, all_sections, has_continuations) # Build output has_html = "html" in adapter_set has_sx = "sx" in adapter_set parts = [] parts.append(PREAMBLE) parts.append(PLATFORM_PY) parts.append(PRIMITIVES_PY_PRE) parts.append(_assemble_primitives_py(prim_modules)) parts.append(PRIMITIVES_PY_POST) if has_deps: parts.append(PLATFORM_DEPS_PY) for label, defines in all_sections: parts.append(f"\n# === Transpiled from {label} ===\n") for name, expr in defines: parts.append(f"# {name}") parts.append(emitter.emit_statement(expr)) parts.append("") parts.append(FIXUPS_PY) if has_continuations: parts.append(CONTINUATIONS_PY) parts.append(public_api_py(has_html, has_sx, has_deps)) return "\n".join(parts) # NOTE: Static platform sections (PREAMBLE, PLATFORM_PY, PRIMITIVES_*, etc.) # are now imported from platform_py.py above. Do not redefine them here. # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): import argparse parser = argparse.ArgumentParser(description="Bootstrap SX spec -> Python") parser.add_argument( "--adapters", default=None, help="Comma-separated adapter names (html,sx). Default: all server-side.", ) parser.add_argument( "--modules", default=None, help="Comma-separated primitive modules (core.* always included). Default: all.", ) parser.add_argument( "--extensions", default=None, help="Comma-separated extensions (continuations). Default: none.", ) parser.add_argument( "--spec-modules", default=None, help="Comma-separated spec modules (deps,engine). Default: none.", ) args = parser.parse_args() adapters = args.adapters.split(",") if args.adapters else None modules = args.modules.split(",") if args.modules else None extensions = args.extensions.split(",") if args.extensions else None spec_modules = args.spec_modules.split(",") if args.spec_modules else None print(compile_ref_to_py(adapters, modules, extensions, spec_modules)) if __name__ == "__main__": main()