Add TCO for parser loops in JS bootstrapper, enable SX_USE_REF

The JS parser transpiled from parser.sx used tail-recursive functions
(readStrLoop, skipWs, readListLoop, etc.) which overflow the stack on
large inputs — the bootstrapper page highlights 100KB of Python and
143KB of JavaScript, producing 7620 spans in a 907KB response.

The bootstrapper now detects zero-arg self-tail-recursive functions and
emits them as while(true) loops with continue instead of recursive
calls. Tested with 150K char strings and 8000 sibling elements.

Also enables SX_USE_REF=1 in dev via x-dev-env anchor.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-05 23:09:02 +00:00
parent 38f1f82988
commit 54adc9c216
4 changed files with 455 additions and 2238 deletions

View File

@@ -717,9 +717,211 @@ class JSEmitter:
def _emit_define(self, expr) -> str:
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
val = self.emit(expr[2])
# Detect zero-arg self-tail-recursive functions and emit as while loops
fn_expr = expr[2] if len(expr) > 2 else None
if (fn_expr and isinstance(fn_expr, list) and fn_expr
and isinstance(fn_expr[0], Symbol) and fn_expr[0].name in ("fn", "lambda")
and isinstance(fn_expr[1], list) and len(fn_expr[1]) == 0
and self._is_self_tail_recursive(name, fn_expr[2:])):
body = fn_expr[2:]
loop_body = self._emit_loop_body(name, body)
return f"var {self._mangle(name)} = function() {{ while(true) {{ {loop_body} }} }};"
val = self.emit(fn_expr) if fn_expr else "NIL"
return f"var {self._mangle(name)} = {val};"
def _is_self_tail_recursive(self, name: str, body: list) -> bool:
"""Check if a function body contains tail calls to itself."""
if not body:
return False
last = body[-1]
return self._has_tail_call(name, last)
def _has_tail_call(self, name: str, expr) -> bool:
"""Check if expr has a tail call to name in any branch."""
if not isinstance(expr, list) or not expr:
return False
head = expr[0]
if not isinstance(head, Symbol):
return False
h = head.name
# Direct tail call
if h == name:
return True
# Branching forms — check if any branch tail-calls
if h == "if":
return (self._has_tail_call(name, expr[2])
or (len(expr) > 3 and self._has_tail_call(name, expr[3])))
if h == "when":
return any(self._has_tail_call(name, e) for e in expr[2:])
if h == "cond":
for clause in expr[1:]:
if isinstance(clause, list) and len(clause) == 2:
if self._has_tail_call(name, clause[1]):
return True
elif isinstance(clause, Keyword):
continue
elif isinstance(clause, list):
if self._has_tail_call(name, clause):
return True
else:
if self._has_tail_call(name, clause):
return True
return False
if h in ("do", "begin"):
return self._has_tail_call(name, expr[-1]) if len(expr) > 1 else False
if h == "let" or h == "let*":
return self._has_tail_call(name, expr[-1]) if len(expr) > 2 else False
return False
def _emit_loop_body(self, name: str, body: list) -> str:
"""Emit a function body as while-loop statements.
Replaces tail-self-calls with `continue` and non-recursive exits with
`return`.
"""
if not body:
return "return NIL;"
# Emit side-effect statements first, then the tail expression as loop logic
parts = []
for b in body[:-1]:
parts.append(self.emit_statement(b))
parts.append(self._emit_tail_as_stmt(name, body[-1]))
return "\n".join(parts)
def _emit_tail_as_stmt(self, name: str, expr) -> str:
"""Emit an expression in tail position as loop statements.
Tail-self-calls → continue; other exits → return expr;
"""
if not isinstance(expr, list) or not expr:
return f"return {self.emit(expr)};"
head = expr[0]
if not isinstance(head, Symbol):
return f"return {self.emit(expr)};"
h = head.name
# Direct tail call to self → continue
if h == name:
return "continue;"
# (do stmt1 stmt2 ... tail) → emit stmts then recurse on tail
if h in ("do", "begin"):
stmts = []
for e in expr[1:-1]:
stmts.append(self.emit_statement(e))
stmts.append(self._emit_tail_as_stmt(name, expr[-1]))
return "\n".join(stmts)
# (if cond then else) → if/else with tail handling in each branch
if h == "if":
cond = self.emit(expr[1])
then_branch = self._emit_tail_as_stmt(name, expr[2])
else_branch = self._emit_tail_as_stmt(name, expr[3]) if len(expr) > 3 else "return NIL;"
return f"if (isSxTruthy({cond})) {{ {then_branch} }} else {{ {else_branch} }}"
# (when cond body...) → if (cond) { body... } else { return NIL; }
if h == "when":
cond = self.emit(expr[1])
body_parts = expr[2:]
if not body_parts:
return f"if (isSxTruthy({cond})) {{}} else {{ return NIL; }}"
stmts = []
for e in body_parts[:-1]:
stmts.append(self.emit_statement(e))
stmts.append(self._emit_tail_as_stmt(name, body_parts[-1]))
inner = "\n".join(stmts)
return f"if (isSxTruthy({cond})) {{ {inner} }} else {{ return NIL; }}"
# (cond clause1 clause2 ...) → if/else if/else chain
if h == "cond":
return self._emit_cond_as_loop_stmt(name, expr[1:])
# (let ((bindings)) body...) → { var ...; tail }
if h in ("let", "let*"):
bindings = expr[1]
body = expr[2:]
parts = []
if isinstance(bindings, list):
if bindings and isinstance(bindings[0], list):
for b in bindings:
vname = b[0].name if isinstance(b[0], Symbol) else str(b[0])
parts.append(f"var {self._mangle(vname)} = {self.emit(b[1])};")
else:
for i in range(0, len(bindings), 2):
vname = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i])
parts.append(f"var {self._mangle(vname)} = {self.emit(bindings[i + 1])};")
for b_expr in body[:-1]:
parts.append(self.emit_statement(b_expr))
parts.append(self._emit_tail_as_stmt(name, body[-1]))
inner = "\n".join(parts)
return f"{{ {inner} }}"
# Not a tail call to self — regular return
return f"return {self.emit(expr)};"
def _emit_cond_as_loop_stmt(self, name: str, clauses) -> str:
"""Emit cond clauses as if/else if/else for loop body."""
if not clauses:
return "return NIL;"
# Detect style: Scheme vs Clojure (same as _emit_cond)
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
if is_scheme:
return self._cond_scheme_loop(name, clauses)
return self._cond_clojure_loop(name, clauses)
def _cond_scheme_loop(self, name: str, clauses) -> str:
parts = []
for i, clause in enumerate(clauses):
cond_expr = clause[0]
body_expr = clause[1]
# Check for :else / else
is_else = (isinstance(cond_expr, Keyword) and cond_expr.name == "else") or \
(isinstance(cond_expr, Symbol) and cond_expr.name == "else") or \
(isinstance(cond_expr, bool) and cond_expr is True)
if is_else:
parts.append(f"{{ {self._emit_tail_as_stmt(name, body_expr)} }}")
break
prefix = "if" if i == 0 else "else if"
cond = self.emit(cond_expr)
body = self._emit_tail_as_stmt(name, body_expr)
parts.append(f"{prefix} (isSxTruthy({cond})) {{ {body} }}")
else:
parts.append("else { return NIL; }")
return " ".join(parts)
def _cond_clojure_loop(self, name: str, clauses) -> str:
parts = []
i = 0
clause_idx = 0
has_else = False
while i < len(clauses):
c = clauses[i]
if isinstance(c, Keyword) and c.name == "else":
if i + 1 < len(clauses):
parts.append(f"else {{ {self._emit_tail_as_stmt(name, clauses[i + 1])} }}")
has_else = True
break
if i + 1 < len(clauses):
prefix = "if" if clause_idx == 0 else "else if"
cond = self.emit(c)
body = self._emit_tail_as_stmt(name, clauses[i + 1])
parts.append(f"{prefix} (isSxTruthy({cond})) {{ {body} }}")
i += 2
else:
parts.append(f"else {{ {self._emit_tail_as_stmt(name, c)} }}")
has_else = True
i += 1
clause_idx += 1
if not has_else:
parts.append("else { return NIL; }")
return " ".join(parts)
def _emit_for_each_stmt(self, expr) -> str:
fn_expr = expr[1]
coll_expr = expr[2]
@@ -774,6 +976,7 @@ def extract_defines(source: str) -> list[tuple[str, list]]:
ADAPTER_FILES = {
"parser": ("parser.sx", "parser"),
"html": ("adapter-html.sx", "adapter-html"),
"sx": ("adapter-sx.sx", "adapter-sx"),
"dom": ("adapter-dom.sx", "adapter-dom"),
@@ -788,7 +991,8 @@ ADAPTER_DEPS = {
"engine": ["dom"],
"orchestration": ["engine", "dom"],
"cssx": [],
"boot": ["dom", "engine", "orchestration", "cssx"],
"boot": ["dom", "engine", "orchestration", "cssx", "parser"],
"parser": [],
}
@@ -805,6 +1009,7 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
# Platform JS blocks keyed by adapter name
adapter_platform = {
"parser": PLATFORM_PARSER_JS,
"dom": PLATFORM_DOM_JS,
"engine": PLATFORM_ENGINE_PURE_JS,
"orchestration": PLATFORM_ORCHESTRATION_JS,
@@ -830,7 +1035,7 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
("eval.sx", "eval"),
("render.sx", "render (core)"),
]
for name in ("html", "sx", "dom", "engine", "orchestration", "cssx", "boot"):
for name in ("parser", "html", "sx", "dom", "engine", "orchestration", "cssx", "boot"):
if name in adapter_set:
sx_files.append(ADAPTER_FILES[name])
@@ -852,11 +1057,17 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
has_orch = "orchestration" in adapter_set
has_cssx = "cssx" in adapter_set
has_boot = "boot" in adapter_set
has_parser = "parser" in adapter_set
adapter_label = "+".join(sorted(adapter_set)) if adapter_set else "core-only"
parts = []
parts.append(PREAMBLE)
parts.append(PLATFORM_JS)
# Parser platform must come before compiled parser.sx
if has_parser:
parts.append(adapter_platform["parser"])
for label, defines in all_sections:
parts.append(f"\n // === Transpiled from {label} ===\n")
for name, expr in defines:
@@ -872,7 +1083,7 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
parts.append(adapter_platform[name])
parts.append(fixups_js(has_html, has_sx, has_dom))
parts.append(public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, adapter_label))
parts.append(public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, has_parser, adapter_label))
parts.append(EPILOGUE)
return "\n".join(parts)
@@ -1393,6 +1604,26 @@ PLATFORM_JS = '''
return NIL;
}'''
PLATFORM_PARSER_JS = r"""
// =========================================================================
// Platform interface — Parser
// =========================================================================
// Character classification derived from the grammar:
// ident-start → [a-zA-Z_~*+\-><=/!?&]
// ident-char → ident-start + [0-9.:\/\[\]#,]
var _identStartRe = /[a-zA-Z_~*+\-><=/!?&]/;
var _identCharRe = /[a-zA-Z0-9_~*+\-><=/!?.:&/\[\]#,]/;
function isIdentStart(ch) { return _identStartRe.test(ch); }
function isIdentChar(ch) { return _identCharRe.test(ch); }
function parseNumber(s) { return Number(s); }
function escapeString(s) {
return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\t/g, "\\t");
}
function sxExprSource(e) { return typeof e === "string" ? e : String(e); }
"""
PLATFORM_DOM_JS = """
// =========================================================================
// Platform interface — DOM adapter (browser-only)
@@ -2203,18 +2434,11 @@ PLATFORM_CSSX_JS = """
if (!cssTarget) return;
var rules = [];
// Child-selector atoms are now routed to pseudoRules by the resolver
// with selector ">:not(:first-child)", so base declarations are always
// applied directly to the class.
if (sv.declarations) {
var hasChild = false;
if (atoms) {
for (var ai = 0; ai < atoms.length; ai++) {
if (isChildSelectorAtom(atoms[ai])) { hasChild = true; break; }
}
}
if (hasChild) {
rules.push("." + sv.className + ">:not(:first-child){" + sv.declarations + "}");
} else {
rules.push("." + sv.className + "{" + sv.declarations + "}");
}
rules.push("." + sv.className + "{" + sv.declarations + "}");
}
for (var pi = 0; pi < sv.pseudoRules.length; pi++) {
var sel = sv.pseudoRules[pi][0], decls = sv.pseudoRules[pi][1];
@@ -2426,109 +2650,17 @@ def fixups_js(has_html, has_sx, has_dom):
return "\n".join(lines)
def public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, adapter_label):
# Parser is always included
parser = r'''
// =========================================================================
// Parser
// =========================================================================
def public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, has_parser, adapter_label):
# Parser: use compiled sxParse from parser.sx, or inline a minimal fallback
if has_parser:
parser = '''
// Parser — compiled from parser.sx (see PLATFORM_PARSER_JS for ident char classes)
var parse = sxParse;'''
else:
parser = r'''
// Minimal fallback parser (no parser adapter)
function parse(text) {
var pos = 0;
function skipWs() {
while (pos < text.length) {
var ch = text[pos];
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") { pos++; continue; }
if (ch === ";") { while (pos < text.length && text[pos] !== "\n") pos++; continue; }
break;
}
}
function readExpr() {
skipWs();
if (pos >= text.length) return undefined;
var ch = text[pos];
if (ch === "(") { pos++; return readList(")"); }
if (ch === "[") { pos++; return readList("]"); }
if (ch === "{") { pos++; return readMap(); }
if (ch === '"') return readString();
if (ch === ":") return readKeyword();
if (ch === "`") { pos++; return [new Symbol("quasiquote"), readExpr()]; }
if (ch === ",") {
pos++;
if (pos < text.length && text[pos] === "@") { pos++; return [new Symbol("splice-unquote"), readExpr()]; }
return [new Symbol("unquote"), readExpr()];
}
if (ch === "-" && pos + 1 < text.length && text[pos + 1] >= "0" && text[pos + 1] <= "9") return readNumber();
if (ch >= "0" && ch <= "9") return readNumber();
return readSymbol();
}
function readList(close) {
var items = [];
while (true) {
skipWs();
if (pos >= text.length) throw new Error("Unterminated list");
if (text[pos] === close) { pos++; return items; }
items.push(readExpr());
}
}
function readMap() {
var result = {};
while (true) {
skipWs();
if (pos >= text.length) throw new Error("Unterminated map");
if (text[pos] === "}") { pos++; return result; }
var key = readExpr();
var keyStr = (key && key._kw) ? key.name : String(key);
result[keyStr] = readExpr();
}
}
function readString() {
pos++; // skip "
var s = "";
while (pos < text.length) {
var ch = text[pos];
if (ch === '"') { pos++; return s; }
if (ch === "\\") { pos++; var esc = text[pos]; s += esc === "n" ? "\n" : esc === "t" ? "\t" : esc === "r" ? "\r" : esc; pos++; continue; }
s += ch; pos++;
}
throw new Error("Unterminated string");
}
function readKeyword() {
pos++; // skip :
var name = readIdent();
return new Keyword(name);
}
function readNumber() {
var start = pos;
if (text[pos] === "-") pos++;
while (pos < text.length && text[pos] >= "0" && text[pos] <= "9") pos++;
if (pos < text.length && text[pos] === ".") { pos++; while (pos < text.length && text[pos] >= "0" && text[pos] <= "9") pos++; }
if (pos < text.length && (text[pos] === "e" || text[pos] === "E")) {
pos++;
if (pos < text.length && (text[pos] === "+" || text[pos] === "-")) pos++;
while (pos < text.length && text[pos] >= "0" && text[pos] <= "9") pos++;
}
return Number(text.slice(start, pos));
}
function readIdent() {
var start = pos;
while (pos < text.length && /[a-zA-Z0-9_~*+\-><=/!?.:&]/.test(text[pos])) pos++;
return text.slice(start, pos);
}
function readSymbol() {
var name = readIdent();
if (name === "true") return true;
if (name === "false") return false;
if (name === "nil") return NIL;
return new Symbol(name);
}
var exprs = [];
while (true) {
skipWs();
if (pos >= text.length) break;
exprs.push(readExpr());
}
return exprs;
throw new Error("Parser adapter not included — cannot parse SX source at runtime");
}'''
# Public API — conditional on adapters