Files
rose-ash/hosts/ocaml/bootstrap.py
giles d735e28b39 Delete sx_ref.py — OCaml is the sole SX evaluator
Removes the 5993-line bootstrapped Python evaluator (sx_ref.py) and all
code that depended on it exclusively. Both bootstrappers (JS + OCaml)
now use a new synchronous OCaml bridge (ocaml_sync.py) to run the
transpiler. JS build produces identical output; OCaml bootstrap produces
byte-identical sx_ref.ml.

Key changes:
- New shared/sx/ocaml_sync.py: sync subprocess bridge to sx_server.exe
- hosts/javascript/bootstrap.py: serialize defines → temp file → OCaml eval
- hosts/ocaml/bootstrap.py: same pattern for OCaml transpiler
- shared/sx/{html,async_eval,resolver,jinja_bridge,handlers,pages,deps,helpers}:
  stub or remove sx_ref imports; runtime uses OCaml bridge (SX_USE_OCAML=1)
- sx/sxc/pages: parse defpage/defhandler from AST instead of Python eval
- hosts/ocaml/lib/sx_primitives.ml: append handles non-list 2nd arg per spec
- Deleted: sx_ref.py, async_eval_ref.py, 6 Python test runners, misc ref/ files

Test results: JS 1078/1078, OCaml 1114/1114.
sx_docs SSR has pre-existing rendering issues to investigate separately.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 14:32:55 +00:00

277 lines
9.8 KiB
Python

#!/usr/bin/env python3
"""
Bootstrap compiler: SX spec -> OCaml.
Loads the SX-to-OCaml transpiler (transpiler.sx), feeds it the spec files,
and produces sx_ref.ml — the transpiled evaluator as native OCaml.
Usage:
python3 hosts/ocaml/bootstrap.py --output hosts/ocaml/lib/sx_ref.ml
"""
from __future__ import annotations
import os
import sys
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT = os.path.abspath(os.path.join(_HERE, "..", ".."))
sys.path.insert(0, _PROJECT)
from shared.sx.parser import parse_all
from shared.sx.types import Symbol
def extract_defines(source: str) -> list[tuple[str, list]]:
"""Parse .sx source, return list of (name, define-expr) for top-level defines."""
exprs = parse_all(source)
defines = []
for expr in exprs:
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
if expr[0].name == "define":
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
defines.append((name, expr))
return defines
# OCaml preamble — opens and runtime helpers
PREAMBLE = """\
(* sx_ref.ml — Auto-generated from SX spec by hosts/ocaml/bootstrap.py *)
(* Do not edit — regenerate with: python3 hosts/ocaml/bootstrap.py *)
[@@@warning "-26-27"]
open Sx_types
open Sx_runtime
(* Trampoline — forward ref, resolved after eval_expr is defined. *)
let trampoline_fn : (value -> value) ref = ref (fun v -> v)
let trampoline v = !trampoline_fn v
(* === Mutable state for strict mode === *)
(* These are defined as top-level refs because the transpiler cannot handle
global set! mutation (it creates local refs that shadow the global). *)
let _strict_ref = ref (Bool false)
let _prim_param_types_ref = ref Nil
(* JIT call hook — cek_call checks this before CEK dispatch for named
lambdas. Registered by sx_server.ml after compiler loads. Tests
run with hook = None (pure CEK, no compilation dependency). *)
let jit_call_hook : (value -> value list -> value option) option ref = ref None
"""
# OCaml fixups — wire up trampoline + iterative CEK run + JIT hook
FIXUPS = """\
(* Wire up trampoline to resolve thunks via the CEK machine *)
let () = trampoline_fn := (fun v ->
match v with
| Thunk (expr, env) -> eval_expr expr (Env env)
| _ -> v)
(* Wire up the primitives trampoline so call_any in HO forms resolves Thunks *)
let () = Sx_primitives._sx_trampoline_fn := !trampoline_fn
(* Override recursive cek_run with iterative loop *)
let cek_run_iterative state =
let s = ref state in
while not (match cek_terminal_p !s with Bool true -> true | _ -> false) do
s := cek_step !s
done;
cek_value !s
"""
def compile_spec_to_ml(spec_dir: str | None = None) -> str:
"""Compile the SX spec to OCaml source."""
import tempfile
from shared.sx.ocaml_sync import OcamlSync
from shared.sx.parser import serialize
if spec_dir is None:
spec_dir = os.path.join(_PROJECT, "spec")
# Load the transpiler into OCaml kernel
bridge = OcamlSync()
transpiler_path = os.path.join(_HERE, "transpiler.sx")
bridge.load(transpiler_path)
# Spec files to transpile (in dependency order)
# stdlib.sx functions are already registered as OCaml primitives —
# only the evaluator needs transpilation.
sx_files = [
("evaluator.sx", "evaluator (frames + eval + CEK)"),
]
parts = [PREAMBLE]
for filename, label in sx_files:
filepath = os.path.join(spec_dir, filename)
if not os.path.exists(filepath):
print(f"Warning: {filepath} not found, skipping", file=sys.stderr)
continue
with open(filepath) as f:
src = f.read()
defines = extract_defines(src)
# Skip defines provided by preamble, fixups, or already-registered primitives
# Skip: preamble-provided, math primitives, and stdlib functions
# that use loop/named-let (transpiler can't handle those yet)
skip = {"trampoline", "ceil", "floor", "round", "abs", "min", "max",
"debug-log", "debug_log", "range", "chunk-every", "zip-pairs",
"string-contains?", "starts-with?", "ends-with?",
"string-replace", "trim", "split", "index-of",
"pad-left", "pad-right", "char-at", "substring"}
defines = [(n, e) for n, e in defines if n not in skip]
# Deduplicate — keep last definition for each name (CEK overrides tree-walk)
seen = {}
for i, (n, e) in enumerate(defines):
seen[n] = i
defines = [(n, e) for i, (n, e) in enumerate(defines) if seen[n] == i]
# Build the defines list and known names for the transpiler
defines_list = [[name, expr] for name, expr in defines]
known_names = [name for name, _ in defines]
# Serialize defines + known names to temp file, load into kernel
defines_sx = serialize(defines_list)
known_sx = serialize(known_names)
with tempfile.NamedTemporaryFile(mode="w", suffix=".sx", delete=False) as tmp:
tmp.write(f"(define _defines \'{defines_sx})\n")
tmp.write(f"(define _known_defines \'{known_sx})\n")
tmp_path = tmp.name
try:
bridge.load(tmp_path)
finally:
os.unlink(tmp_path)
# Call ml-translate-file — emits as single let rec block
result = bridge.eval("(ml-translate-file _defines)")
parts.append(f"\n(* === Transpiled from {label} === *)\n")
parts.append(result)
bridge.stop()
parts.append(FIXUPS)
output = "\n".join(parts)
# Post-process: fix mutable globals that the transpiler can't handle.
# The transpiler emits local refs for set! targets within functions,
# but top-level globals (*strict*, *prim-param-types*) need to use
# the pre-declared refs from the preamble.
import re
# Fix *strict*: use _strict_ref instead of immutable let rec binding
output = re.sub(
r'and _strict_ =\n \(Bool false\)',
'and _strict_ = !_strict_ref',
output,
)
# Fix set-strict!: use _strict_ref instead of local ref
output = re.sub(
r'and set_strict_b val\' =\n let _strict_ = ref Nil in \(_strict_ := val\'; Nil\)',
"and set_strict_b val' =\n _strict_ref := val'; Nil",
output,
)
# Fix *prim-param-types*: use _prim_param_types_ref
output = re.sub(
r'and _prim_param_types_ =\n Nil',
'and _prim_param_types_ = !_prim_param_types_ref',
output,
)
# Fix set-prim-param-types!: use _prim_param_types_ref
output = re.sub(
r'and set_prim_param_types_b types =\n let _prim_param_types_ = ref Nil in \(_prim_param_types_ := types; Nil\)',
"and set_prim_param_types_b types =\n _prim_param_types_ref := types; Nil",
output,
)
# Fix all runtime reads of _strict_ and _prim_param_types_ to deref
# the mutable refs instead of using the stale let-rec bindings.
# This is needed because let-rec value bindings capture initial values.
# Use regex with word boundary to avoid replacing _strict_ref with
# !_strict_refref.
def fix_mutable_reads(text):
lines = text.split('\n')
fixed = []
for line in lines:
# Skip the definition lines
stripped = line.strip()
if stripped.startswith('and _strict_ =') or stripped.startswith('and _prim_param_types_ ='):
fixed.append(line)
continue
# Replace _strict_ as a standalone identifier only (not inside
# other names like set_strict_b). Match when preceded by space,
# paren, or start-of-line, and followed by space, paren, or ;.
line = re.sub(r'(?<=[ (])_strict_(?=[ );])', '!_strict_ref', line)
line = re.sub(r'(?<=[ (])_prim_param_types_(?=[ );])', '!_prim_param_types_ref', line)
fixed.append(line)
return '\n'.join(fixed)
output = fix_mutable_reads(output)
# Fix cek_call: the spec passes (make-env) as the env arg to
# continue_with_call, but the transpiler evaluates make-env at
# transpile time (it's a primitive), producing Dict instead of Env.
output = output.replace(
"((Dict (Hashtbl.create 0))) (a) ((List []))",
"(Env (Sx_types.make_env ())) (a) ((List []))",
)
# Inject JIT dispatch into continue_with_call's lambda branch.
# After params are bound, check jit_call_hook before creating CEK state.
lambda_body_pattern = (
'(prim_call "slice" [params; (len (args))])); Nil)) in '
'(make_cek_state ((lambda_body (f))) (local) (kont))'
)
lambda_body_jit = (
'(prim_call "slice" [params; (len (args))])); Nil)) in '
'(match !jit_call_hook, f with '
'| Some hook, Lambda l when l.l_name <> None -> '
'let args_list = match args with '
'List a | ListRef { contents = a } -> a | _ -> [] in '
'(match hook f args_list with '
'Some result -> make_cek_value result local kont '
'| None -> make_cek_state (lambda_body f) local kont) '
'| _ -> make_cek_state ((lambda_body (f))) (local) (kont))'
)
if lambda_body_pattern in output:
output = output.replace(lambda_body_pattern, lambda_body_jit, 1)
else:
import sys
print("WARNING: Could not find lambda body pattern for JIT injection", file=sys.stderr)
return output
def main():
import argparse
parser = argparse.ArgumentParser(description="Bootstrap SX spec -> OCaml")
parser.add_argument(
"--output", "-o",
default=None,
help="Output file (default: stdout)",
)
args = parser.parse_args()
result = compile_spec_to_ml()
if args.output:
with open(args.output, "w") as f:
f.write(result)
size = os.path.getsize(args.output)
print(f"Wrote {args.output} ({size} bytes)", file=sys.stderr)
else:
print(result)
if __name__ == "__main__":
main()