Add TCO for parser loops in JS bootstrapper, enable SX_USE_REF
The JS parser transpiled from parser.sx used tail-recursive functions (readStrLoop, skipWs, readListLoop, etc.) which overflow the stack on large inputs — the bootstrapper page highlights 100KB of Python and 143KB of JavaScript, producing 7620 spans in a 907KB response. The bootstrapper now detects zero-arg self-tail-recursive functions and emits them as while(true) loops with continue instead of recursive calls. Tested with 150K char strings and 8000 sibling elements. Also enables SX_USE_REF=1 in dev via x-dev-env anchor. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -717,9 +717,211 @@ class JSEmitter:
|
||||
|
||||
def _emit_define(self, expr) -> str:
|
||||
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
|
||||
val = self.emit(expr[2])
|
||||
# Detect zero-arg self-tail-recursive functions and emit as while loops
|
||||
fn_expr = expr[2] if len(expr) > 2 else None
|
||||
if (fn_expr and isinstance(fn_expr, list) and fn_expr
|
||||
and isinstance(fn_expr[0], Symbol) and fn_expr[0].name in ("fn", "lambda")
|
||||
and isinstance(fn_expr[1], list) and len(fn_expr[1]) == 0
|
||||
and self._is_self_tail_recursive(name, fn_expr[2:])):
|
||||
body = fn_expr[2:]
|
||||
loop_body = self._emit_loop_body(name, body)
|
||||
return f"var {self._mangle(name)} = function() {{ while(true) {{ {loop_body} }} }};"
|
||||
val = self.emit(fn_expr) if fn_expr else "NIL"
|
||||
return f"var {self._mangle(name)} = {val};"
|
||||
|
||||
def _is_self_tail_recursive(self, name: str, body: list) -> bool:
|
||||
"""Check if a function body contains tail calls to itself."""
|
||||
if not body:
|
||||
return False
|
||||
last = body[-1]
|
||||
return self._has_tail_call(name, last)
|
||||
|
||||
def _has_tail_call(self, name: str, expr) -> bool:
|
||||
"""Check if expr has a tail call to name in any branch."""
|
||||
if not isinstance(expr, list) or not expr:
|
||||
return False
|
||||
head = expr[0]
|
||||
if not isinstance(head, Symbol):
|
||||
return False
|
||||
h = head.name
|
||||
# Direct tail call
|
||||
if h == name:
|
||||
return True
|
||||
# Branching forms — check if any branch tail-calls
|
||||
if h == "if":
|
||||
return (self._has_tail_call(name, expr[2])
|
||||
or (len(expr) > 3 and self._has_tail_call(name, expr[3])))
|
||||
if h == "when":
|
||||
return any(self._has_tail_call(name, e) for e in expr[2:])
|
||||
if h == "cond":
|
||||
for clause in expr[1:]:
|
||||
if isinstance(clause, list) and len(clause) == 2:
|
||||
if self._has_tail_call(name, clause[1]):
|
||||
return True
|
||||
elif isinstance(clause, Keyword):
|
||||
continue
|
||||
elif isinstance(clause, list):
|
||||
if self._has_tail_call(name, clause):
|
||||
return True
|
||||
else:
|
||||
if self._has_tail_call(name, clause):
|
||||
return True
|
||||
return False
|
||||
if h in ("do", "begin"):
|
||||
return self._has_tail_call(name, expr[-1]) if len(expr) > 1 else False
|
||||
if h == "let" or h == "let*":
|
||||
return self._has_tail_call(name, expr[-1]) if len(expr) > 2 else False
|
||||
return False
|
||||
|
||||
def _emit_loop_body(self, name: str, body: list) -> str:
|
||||
"""Emit a function body as while-loop statements.
|
||||
|
||||
Replaces tail-self-calls with `continue` and non-recursive exits with
|
||||
`return`.
|
||||
"""
|
||||
if not body:
|
||||
return "return NIL;"
|
||||
# Emit side-effect statements first, then the tail expression as loop logic
|
||||
parts = []
|
||||
for b in body[:-1]:
|
||||
parts.append(self.emit_statement(b))
|
||||
parts.append(self._emit_tail_as_stmt(name, body[-1]))
|
||||
return "\n".join(parts)
|
||||
|
||||
def _emit_tail_as_stmt(self, name: str, expr) -> str:
|
||||
"""Emit an expression in tail position as loop statements.
|
||||
|
||||
Tail-self-calls → continue; other exits → return expr;
|
||||
"""
|
||||
if not isinstance(expr, list) or not expr:
|
||||
return f"return {self.emit(expr)};"
|
||||
|
||||
head = expr[0]
|
||||
if not isinstance(head, Symbol):
|
||||
return f"return {self.emit(expr)};"
|
||||
|
||||
h = head.name
|
||||
|
||||
# Direct tail call to self → continue
|
||||
if h == name:
|
||||
return "continue;"
|
||||
|
||||
# (do stmt1 stmt2 ... tail) → emit stmts then recurse on tail
|
||||
if h in ("do", "begin"):
|
||||
stmts = []
|
||||
for e in expr[1:-1]:
|
||||
stmts.append(self.emit_statement(e))
|
||||
stmts.append(self._emit_tail_as_stmt(name, expr[-1]))
|
||||
return "\n".join(stmts)
|
||||
|
||||
# (if cond then else) → if/else with tail handling in each branch
|
||||
if h == "if":
|
||||
cond = self.emit(expr[1])
|
||||
then_branch = self._emit_tail_as_stmt(name, expr[2])
|
||||
else_branch = self._emit_tail_as_stmt(name, expr[3]) if len(expr) > 3 else "return NIL;"
|
||||
return f"if (isSxTruthy({cond})) {{ {then_branch} }} else {{ {else_branch} }}"
|
||||
|
||||
# (when cond body...) → if (cond) { body... } else { return NIL; }
|
||||
if h == "when":
|
||||
cond = self.emit(expr[1])
|
||||
body_parts = expr[2:]
|
||||
if not body_parts:
|
||||
return f"if (isSxTruthy({cond})) {{}} else {{ return NIL; }}"
|
||||
stmts = []
|
||||
for e in body_parts[:-1]:
|
||||
stmts.append(self.emit_statement(e))
|
||||
stmts.append(self._emit_tail_as_stmt(name, body_parts[-1]))
|
||||
inner = "\n".join(stmts)
|
||||
return f"if (isSxTruthy({cond})) {{ {inner} }} else {{ return NIL; }}"
|
||||
|
||||
# (cond clause1 clause2 ...) → if/else if/else chain
|
||||
if h == "cond":
|
||||
return self._emit_cond_as_loop_stmt(name, expr[1:])
|
||||
|
||||
# (let ((bindings)) body...) → { var ...; tail }
|
||||
if h in ("let", "let*"):
|
||||
bindings = expr[1]
|
||||
body = expr[2:]
|
||||
parts = []
|
||||
if isinstance(bindings, list):
|
||||
if bindings and isinstance(bindings[0], list):
|
||||
for b in bindings:
|
||||
vname = b[0].name if isinstance(b[0], Symbol) else str(b[0])
|
||||
parts.append(f"var {self._mangle(vname)} = {self.emit(b[1])};")
|
||||
else:
|
||||
for i in range(0, len(bindings), 2):
|
||||
vname = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i])
|
||||
parts.append(f"var {self._mangle(vname)} = {self.emit(bindings[i + 1])};")
|
||||
for b_expr in body[:-1]:
|
||||
parts.append(self.emit_statement(b_expr))
|
||||
parts.append(self._emit_tail_as_stmt(name, body[-1]))
|
||||
inner = "\n".join(parts)
|
||||
return f"{{ {inner} }}"
|
||||
|
||||
# Not a tail call to self — regular return
|
||||
return f"return {self.emit(expr)};"
|
||||
|
||||
def _emit_cond_as_loop_stmt(self, name: str, clauses) -> str:
|
||||
"""Emit cond clauses as if/else if/else for loop body."""
|
||||
if not clauses:
|
||||
return "return NIL;"
|
||||
|
||||
# Detect style: Scheme vs Clojure (same as _emit_cond)
|
||||
is_scheme = (
|
||||
all(isinstance(c, list) and len(c) == 2 for c in clauses)
|
||||
and not any(isinstance(c, Keyword) for c in clauses)
|
||||
)
|
||||
if is_scheme:
|
||||
return self._cond_scheme_loop(name, clauses)
|
||||
return self._cond_clojure_loop(name, clauses)
|
||||
|
||||
def _cond_scheme_loop(self, name: str, clauses) -> str:
|
||||
parts = []
|
||||
for i, clause in enumerate(clauses):
|
||||
cond_expr = clause[0]
|
||||
body_expr = clause[1]
|
||||
# Check for :else / else
|
||||
is_else = (isinstance(cond_expr, Keyword) and cond_expr.name == "else") or \
|
||||
(isinstance(cond_expr, Symbol) and cond_expr.name == "else") or \
|
||||
(isinstance(cond_expr, bool) and cond_expr is True)
|
||||
if is_else:
|
||||
parts.append(f"{{ {self._emit_tail_as_stmt(name, body_expr)} }}")
|
||||
break
|
||||
prefix = "if" if i == 0 else "else if"
|
||||
cond = self.emit(cond_expr)
|
||||
body = self._emit_tail_as_stmt(name, body_expr)
|
||||
parts.append(f"{prefix} (isSxTruthy({cond})) {{ {body} }}")
|
||||
else:
|
||||
parts.append("else { return NIL; }")
|
||||
return " ".join(parts)
|
||||
|
||||
def _cond_clojure_loop(self, name: str, clauses) -> str:
|
||||
parts = []
|
||||
i = 0
|
||||
clause_idx = 0
|
||||
has_else = False
|
||||
while i < len(clauses):
|
||||
c = clauses[i]
|
||||
if isinstance(c, Keyword) and c.name == "else":
|
||||
if i + 1 < len(clauses):
|
||||
parts.append(f"else {{ {self._emit_tail_as_stmt(name, clauses[i + 1])} }}")
|
||||
has_else = True
|
||||
break
|
||||
if i + 1 < len(clauses):
|
||||
prefix = "if" if clause_idx == 0 else "else if"
|
||||
cond = self.emit(c)
|
||||
body = self._emit_tail_as_stmt(name, clauses[i + 1])
|
||||
parts.append(f"{prefix} (isSxTruthy({cond})) {{ {body} }}")
|
||||
i += 2
|
||||
else:
|
||||
parts.append(f"else {{ {self._emit_tail_as_stmt(name, c)} }}")
|
||||
has_else = True
|
||||
i += 1
|
||||
clause_idx += 1
|
||||
if not has_else:
|
||||
parts.append("else { return NIL; }")
|
||||
return " ".join(parts)
|
||||
|
||||
def _emit_for_each_stmt(self, expr) -> str:
|
||||
fn_expr = expr[1]
|
||||
coll_expr = expr[2]
|
||||
@@ -774,6 +976,7 @@ def extract_defines(source: str) -> list[tuple[str, list]]:
|
||||
|
||||
|
||||
ADAPTER_FILES = {
|
||||
"parser": ("parser.sx", "parser"),
|
||||
"html": ("adapter-html.sx", "adapter-html"),
|
||||
"sx": ("adapter-sx.sx", "adapter-sx"),
|
||||
"dom": ("adapter-dom.sx", "adapter-dom"),
|
||||
@@ -788,7 +991,8 @@ ADAPTER_DEPS = {
|
||||
"engine": ["dom"],
|
||||
"orchestration": ["engine", "dom"],
|
||||
"cssx": [],
|
||||
"boot": ["dom", "engine", "orchestration", "cssx"],
|
||||
"boot": ["dom", "engine", "orchestration", "cssx", "parser"],
|
||||
"parser": [],
|
||||
}
|
||||
|
||||
|
||||
@@ -805,6 +1009,7 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
|
||||
|
||||
# Platform JS blocks keyed by adapter name
|
||||
adapter_platform = {
|
||||
"parser": PLATFORM_PARSER_JS,
|
||||
"dom": PLATFORM_DOM_JS,
|
||||
"engine": PLATFORM_ENGINE_PURE_JS,
|
||||
"orchestration": PLATFORM_ORCHESTRATION_JS,
|
||||
@@ -830,7 +1035,7 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
|
||||
("eval.sx", "eval"),
|
||||
("render.sx", "render (core)"),
|
||||
]
|
||||
for name in ("html", "sx", "dom", "engine", "orchestration", "cssx", "boot"):
|
||||
for name in ("parser", "html", "sx", "dom", "engine", "orchestration", "cssx", "boot"):
|
||||
if name in adapter_set:
|
||||
sx_files.append(ADAPTER_FILES[name])
|
||||
|
||||
@@ -852,11 +1057,17 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
|
||||
has_orch = "orchestration" in adapter_set
|
||||
has_cssx = "cssx" in adapter_set
|
||||
has_boot = "boot" in adapter_set
|
||||
has_parser = "parser" in adapter_set
|
||||
adapter_label = "+".join(sorted(adapter_set)) if adapter_set else "core-only"
|
||||
|
||||
parts = []
|
||||
parts.append(PREAMBLE)
|
||||
parts.append(PLATFORM_JS)
|
||||
|
||||
# Parser platform must come before compiled parser.sx
|
||||
if has_parser:
|
||||
parts.append(adapter_platform["parser"])
|
||||
|
||||
for label, defines in all_sections:
|
||||
parts.append(f"\n // === Transpiled from {label} ===\n")
|
||||
for name, expr in defines:
|
||||
@@ -872,7 +1083,7 @@ def compile_ref_to_js(adapters: list[str] | None = None) -> str:
|
||||
parts.append(adapter_platform[name])
|
||||
|
||||
parts.append(fixups_js(has_html, has_sx, has_dom))
|
||||
parts.append(public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, adapter_label))
|
||||
parts.append(public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, has_parser, adapter_label))
|
||||
parts.append(EPILOGUE)
|
||||
return "\n".join(parts)
|
||||
|
||||
@@ -1393,6 +1604,26 @@ PLATFORM_JS = '''
|
||||
return NIL;
|
||||
}'''
|
||||
|
||||
PLATFORM_PARSER_JS = r"""
|
||||
// =========================================================================
|
||||
// Platform interface — Parser
|
||||
// =========================================================================
|
||||
// Character classification derived from the grammar:
|
||||
// ident-start → [a-zA-Z_~*+\-><=/!?&]
|
||||
// ident-char → ident-start + [0-9.:\/\[\]#,]
|
||||
|
||||
var _identStartRe = /[a-zA-Z_~*+\-><=/!?&]/;
|
||||
var _identCharRe = /[a-zA-Z0-9_~*+\-><=/!?.:&/\[\]#,]/;
|
||||
|
||||
function isIdentStart(ch) { return _identStartRe.test(ch); }
|
||||
function isIdentChar(ch) { return _identCharRe.test(ch); }
|
||||
function parseNumber(s) { return Number(s); }
|
||||
function escapeString(s) {
|
||||
return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\t/g, "\\t");
|
||||
}
|
||||
function sxExprSource(e) { return typeof e === "string" ? e : String(e); }
|
||||
"""
|
||||
|
||||
PLATFORM_DOM_JS = """
|
||||
// =========================================================================
|
||||
// Platform interface — DOM adapter (browser-only)
|
||||
@@ -2203,18 +2434,11 @@ PLATFORM_CSSX_JS = """
|
||||
if (!cssTarget) return;
|
||||
|
||||
var rules = [];
|
||||
// Child-selector atoms are now routed to pseudoRules by the resolver
|
||||
// with selector ">:not(:first-child)", so base declarations are always
|
||||
// applied directly to the class.
|
||||
if (sv.declarations) {
|
||||
var hasChild = false;
|
||||
if (atoms) {
|
||||
for (var ai = 0; ai < atoms.length; ai++) {
|
||||
if (isChildSelectorAtom(atoms[ai])) { hasChild = true; break; }
|
||||
}
|
||||
}
|
||||
if (hasChild) {
|
||||
rules.push("." + sv.className + ">:not(:first-child){" + sv.declarations + "}");
|
||||
} else {
|
||||
rules.push("." + sv.className + "{" + sv.declarations + "}");
|
||||
}
|
||||
rules.push("." + sv.className + "{" + sv.declarations + "}");
|
||||
}
|
||||
for (var pi = 0; pi < sv.pseudoRules.length; pi++) {
|
||||
var sel = sv.pseudoRules[pi][0], decls = sv.pseudoRules[pi][1];
|
||||
@@ -2426,109 +2650,17 @@ def fixups_js(has_html, has_sx, has_dom):
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, adapter_label):
|
||||
# Parser is always included
|
||||
parser = r'''
|
||||
// =========================================================================
|
||||
// Parser
|
||||
// =========================================================================
|
||||
|
||||
def public_api_js(has_html, has_sx, has_dom, has_engine, has_orch, has_cssx, has_boot, has_parser, adapter_label):
|
||||
# Parser: use compiled sxParse from parser.sx, or inline a minimal fallback
|
||||
if has_parser:
|
||||
parser = '''
|
||||
// Parser — compiled from parser.sx (see PLATFORM_PARSER_JS for ident char classes)
|
||||
var parse = sxParse;'''
|
||||
else:
|
||||
parser = r'''
|
||||
// Minimal fallback parser (no parser adapter)
|
||||
function parse(text) {
|
||||
var pos = 0;
|
||||
function skipWs() {
|
||||
while (pos < text.length) {
|
||||
var ch = text[pos];
|
||||
if (ch === " " || ch === "\t" || ch === "\n" || ch === "\r") { pos++; continue; }
|
||||
if (ch === ";") { while (pos < text.length && text[pos] !== "\n") pos++; continue; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
function readExpr() {
|
||||
skipWs();
|
||||
if (pos >= text.length) return undefined;
|
||||
var ch = text[pos];
|
||||
if (ch === "(") { pos++; return readList(")"); }
|
||||
if (ch === "[") { pos++; return readList("]"); }
|
||||
if (ch === "{") { pos++; return readMap(); }
|
||||
if (ch === '"') return readString();
|
||||
if (ch === ":") return readKeyword();
|
||||
if (ch === "`") { pos++; return [new Symbol("quasiquote"), readExpr()]; }
|
||||
if (ch === ",") {
|
||||
pos++;
|
||||
if (pos < text.length && text[pos] === "@") { pos++; return [new Symbol("splice-unquote"), readExpr()]; }
|
||||
return [new Symbol("unquote"), readExpr()];
|
||||
}
|
||||
if (ch === "-" && pos + 1 < text.length && text[pos + 1] >= "0" && text[pos + 1] <= "9") return readNumber();
|
||||
if (ch >= "0" && ch <= "9") return readNumber();
|
||||
return readSymbol();
|
||||
}
|
||||
function readList(close) {
|
||||
var items = [];
|
||||
while (true) {
|
||||
skipWs();
|
||||
if (pos >= text.length) throw new Error("Unterminated list");
|
||||
if (text[pos] === close) { pos++; return items; }
|
||||
items.push(readExpr());
|
||||
}
|
||||
}
|
||||
function readMap() {
|
||||
var result = {};
|
||||
while (true) {
|
||||
skipWs();
|
||||
if (pos >= text.length) throw new Error("Unterminated map");
|
||||
if (text[pos] === "}") { pos++; return result; }
|
||||
var key = readExpr();
|
||||
var keyStr = (key && key._kw) ? key.name : String(key);
|
||||
result[keyStr] = readExpr();
|
||||
}
|
||||
}
|
||||
function readString() {
|
||||
pos++; // skip "
|
||||
var s = "";
|
||||
while (pos < text.length) {
|
||||
var ch = text[pos];
|
||||
if (ch === '"') { pos++; return s; }
|
||||
if (ch === "\\") { pos++; var esc = text[pos]; s += esc === "n" ? "\n" : esc === "t" ? "\t" : esc === "r" ? "\r" : esc; pos++; continue; }
|
||||
s += ch; pos++;
|
||||
}
|
||||
throw new Error("Unterminated string");
|
||||
}
|
||||
function readKeyword() {
|
||||
pos++; // skip :
|
||||
var name = readIdent();
|
||||
return new Keyword(name);
|
||||
}
|
||||
function readNumber() {
|
||||
var start = pos;
|
||||
if (text[pos] === "-") pos++;
|
||||
while (pos < text.length && text[pos] >= "0" && text[pos] <= "9") pos++;
|
||||
if (pos < text.length && text[pos] === ".") { pos++; while (pos < text.length && text[pos] >= "0" && text[pos] <= "9") pos++; }
|
||||
if (pos < text.length && (text[pos] === "e" || text[pos] === "E")) {
|
||||
pos++;
|
||||
if (pos < text.length && (text[pos] === "+" || text[pos] === "-")) pos++;
|
||||
while (pos < text.length && text[pos] >= "0" && text[pos] <= "9") pos++;
|
||||
}
|
||||
return Number(text.slice(start, pos));
|
||||
}
|
||||
function readIdent() {
|
||||
var start = pos;
|
||||
while (pos < text.length && /[a-zA-Z0-9_~*+\-><=/!?.:&]/.test(text[pos])) pos++;
|
||||
return text.slice(start, pos);
|
||||
}
|
||||
function readSymbol() {
|
||||
var name = readIdent();
|
||||
if (name === "true") return true;
|
||||
if (name === "false") return false;
|
||||
if (name === "nil") return NIL;
|
||||
return new Symbol(name);
|
||||
}
|
||||
var exprs = [];
|
||||
while (true) {
|
||||
skipWs();
|
||||
if (pos >= text.length) break;
|
||||
exprs.push(readExpr());
|
||||
}
|
||||
return exprs;
|
||||
throw new Error("Parser adapter not included — cannot parse SX source at runtime");
|
||||
}'''
|
||||
|
||||
# Public API — conditional on adapters
|
||||
|
||||
Reference in New Issue
Block a user