Files
rose-ash/hosts/ocaml/bootstrap_compiler.py
giles a24efc1a00 Bootstrap SX bytecode compiler to native OCaml
Transpile lib/compiler.sx → hosts/ocaml/lib/sx_compiler.ml (42 functions).
The bytecode compiler now runs as native OCaml instead of interpreted SX,
eliminating the 24s JIT warm-up for compiler functions.

- bootstrap_compiler.py: transpiler script (like bootstrap.py for evaluator)
- sx_compiler.ml: 39KB native compiler (compile, compile-module, etc.)
- Bind compile/compile-module as native functions in setup_core_operations
- Add mutable_list to sx_runtime.ml (used by compiler pool)
- Add native parse function (wraps Sx_parser.parse_all)
- compile-match delegated via ref (uses letrec, transpiler can't handle)
- Compile all 23 bytecode modules successfully (was 0/23 due to WASM overflow)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 10:30:53 +00:00

152 lines
5.1 KiB
Python

#!/usr/bin/env python3
"""
Bootstrap the SX bytecode compiler to native OCaml.
Loads the SX-to-OCaml transpiler (transpiler.sx), feeds it compiler.sx,
and produces sx_compiler.ml — the bytecode compiler as native OCaml.
Usage:
python3 hosts/ocaml/bootstrap_compiler.py
"""
from __future__ import annotations
import os
import sys
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT = os.path.abspath(os.path.join(_HERE, "..", ".."))
sys.path.insert(0, _PROJECT)
from shared.sx.parser import parse_all
from shared.sx.types import Symbol
def extract_defines(source: str) -> list[tuple[str, list]]:
"""Parse .sx source, return list of (name, define-expr) for top-level defines."""
exprs = parse_all(source)
defines = []
for expr in exprs:
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
if expr[0].name == "define":
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
defines.append((name, expr))
return defines
PREAMBLE = """\
(* sx_compiler.ml — Auto-generated from lib/compiler.sx *)
(* Do not edit — regenerate with: python3 hosts/ocaml/bootstrap_compiler.py *)
[@@@warning "-26-27"]
open Sx_types
open Sx_runtime
(* The compiler uses cek_call from the evaluator for runtime dispatch *)
let cek_call = Sx_ref.cek_call
let eval_expr = Sx_ref.eval_expr
let trampoline v = match v with
| Thunk (expr, env) -> Sx_ref.eval_expr expr (Env env)
| other -> other
(* Bindings for external functions the compiler calls.
Some shadow OCaml stdlib names — the SX versions operate on values. *)
let serialize v = String (Sx_types.inspect v)
let sx_parse v = match v with
| String s -> (match Sx_parser.parse_all s with [e] -> e | es -> List es)
| v -> v
let floor v = prim_call "floor" [v]
let abs v = prim_call "abs" [v]
let min a b = prim_call "min" [a; b]
let max a b = prim_call "max" [a; b]
let set_nth_b lst idx v = prim_call "set-nth!" [lst; idx; v]
(* skip_annotations: strips :keyword value pairs from a list (type annotations) *)
let rec skip_annotations items =
match items with
| List [] | Nil -> Nil
| List (Keyword _ :: _ :: rest) -> skip_annotations (List rest)
| ListRef { contents = [] } -> Nil
| ListRef { contents = Keyword _ :: _ :: rest } -> skip_annotations (List rest)
| List (first :: _) -> first
| ListRef { contents = first :: _ } -> first
| _ -> Nil
(* compile_match: uses local recursion (letrec) that the transpiler can't handle.
Falls back to CEK evaluation at runtime. *)
let compile_match em args scope tail_p =
let fn = Sx_ref.eval_expr (Symbol "compile-match") (Env (Sx_types.make_env ())) in
ignore (Sx_ref.cek_call fn (List [em; args; scope; tail_p]))
"""
def main():
import tempfile
from shared.sx.ocaml_sync import OcamlSync
from shared.sx.parser import serialize
# Load the transpiler into OCaml kernel
bridge = OcamlSync()
transpiler_path = os.path.join(_HERE, "transpiler.sx")
bridge.load(transpiler_path)
# Read compiler.sx
compiler_path = os.path.join(_PROJECT, "lib", "compiler.sx")
with open(compiler_path) as f:
src = f.read()
defines = extract_defines(src)
# Skip functions that use letrec/named-let (transpiler can't handle)
skip = {"compile-match"}
defines = [(n, e) for n, e in defines if n not in skip]
# Deduplicate (keep last definition)
seen = {}
for i, (n, e) in enumerate(defines):
seen[n] = i
defines = [(n, e) for i, (n, e) in enumerate(defines) if seen[n] == i]
print(f"Transpiling {len(defines)} defines from compiler.sx...", file=sys.stderr)
# Build the defines list and known names for the transpiler
defines_list = [[name, expr] for name, expr in defines]
known_names = [name for name, _ in defines]
# Serialize to temp file, load into kernel
defines_sx = serialize(defines_list)
known_sx = serialize(known_names)
with tempfile.NamedTemporaryFile(mode="w", suffix=".sx", delete=False) as tmp:
tmp.write(f"(define _defines '{defines_sx})\n")
tmp.write(f"(define _known_defines '{known_sx})\n")
tmp_path = tmp.name
try:
bridge.load(tmp_path)
finally:
os.unlink(tmp_path)
# Call ml-translate-file — emits as single let rec block
result = bridge.eval("(ml-translate-file _defines)")
bridge.stop()
output = PREAMBLE + "\n(* === Transpiled from bytecode compiler === *)\n" + result + "\n"
# Post-process: fix skip_annotations local NativeFn → use top-level
old = 'then (let skip_annotations = (NativeFn ('
if old in output:
idx = output.index(old)
end_marker = 'in (skip_annotations (rest_args)))'
end_idx = output.index(end_marker, idx)
output = output[:idx] + 'then (skip_annotations (rest_args))' + output[end_idx + len(end_marker):]
# Write output
out_path = os.path.join(_HERE, "lib", "sx_compiler.ml")
with open(out_path, "w") as f:
f.write(output)
print(f"Wrote {len(output)} bytes to {out_path}", file=sys.stderr)
print(f" {len(defines)} functions transpiled", file=sys.stderr)
if __name__ == "__main__":
main()