Bootstrap SX bytecode compiler to native OCaml
Transpile lib/compiler.sx → hosts/ocaml/lib/sx_compiler.ml (42 functions). The bytecode compiler now runs as native OCaml instead of interpreted SX, eliminating the 24s JIT warm-up for compiler functions. - bootstrap_compiler.py: transpiler script (like bootstrap.py for evaluator) - sx_compiler.ml: 39KB native compiler (compile, compile-module, etc.) - Bind compile/compile-module as native functions in setup_core_operations - Add mutable_list to sx_runtime.ml (used by compiler pool) - Add native parse function (wraps Sx_parser.parse_all) - compile-match delegated via ref (uses letrec, transpiler can't handle) - Compile all 23 bytecode modules successfully (was 0/23 due to WASM overflow) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
151
hosts/ocaml/bootstrap_compiler.py
Normal file
151
hosts/ocaml/bootstrap_compiler.py
Normal file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bootstrap the SX bytecode compiler to native OCaml.
|
||||
|
||||
Loads the SX-to-OCaml transpiler (transpiler.sx), feeds it compiler.sx,
|
||||
and produces sx_compiler.ml — the bytecode compiler as native OCaml.
|
||||
|
||||
Usage:
|
||||
python3 hosts/ocaml/bootstrap_compiler.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT = os.path.abspath(os.path.join(_HERE, "..", ".."))
|
||||
sys.path.insert(0, _PROJECT)
|
||||
|
||||
from shared.sx.parser import parse_all
|
||||
from shared.sx.types import Symbol
|
||||
|
||||
|
||||
def extract_defines(source: str) -> list[tuple[str, list]]:
|
||||
"""Parse .sx source, return list of (name, define-expr) for top-level defines."""
|
||||
exprs = parse_all(source)
|
||||
defines = []
|
||||
for expr in exprs:
|
||||
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
|
||||
if expr[0].name == "define":
|
||||
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
|
||||
defines.append((name, expr))
|
||||
return defines
|
||||
|
||||
|
||||
PREAMBLE = """\
|
||||
(* sx_compiler.ml — Auto-generated from lib/compiler.sx *)
|
||||
(* Do not edit — regenerate with: python3 hosts/ocaml/bootstrap_compiler.py *)
|
||||
|
||||
[@@@warning "-26-27"]
|
||||
|
||||
open Sx_types
|
||||
open Sx_runtime
|
||||
|
||||
(* The compiler uses cek_call from the evaluator for runtime dispatch *)
|
||||
let cek_call = Sx_ref.cek_call
|
||||
let eval_expr = Sx_ref.eval_expr
|
||||
let trampoline v = match v with
|
||||
| Thunk (expr, env) -> Sx_ref.eval_expr expr (Env env)
|
||||
| other -> other
|
||||
|
||||
(* Bindings for external functions the compiler calls.
|
||||
Some shadow OCaml stdlib names — the SX versions operate on values. *)
|
||||
let serialize v = String (Sx_types.inspect v)
|
||||
let sx_parse v = match v with
|
||||
| String s -> (match Sx_parser.parse_all s with [e] -> e | es -> List es)
|
||||
| v -> v
|
||||
let floor v = prim_call "floor" [v]
|
||||
let abs v = prim_call "abs" [v]
|
||||
let min a b = prim_call "min" [a; b]
|
||||
let max a b = prim_call "max" [a; b]
|
||||
let set_nth_b lst idx v = prim_call "set-nth!" [lst; idx; v]
|
||||
|
||||
(* skip_annotations: strips :keyword value pairs from a list (type annotations) *)
|
||||
let rec skip_annotations items =
|
||||
match items with
|
||||
| List [] | Nil -> Nil
|
||||
| List (Keyword _ :: _ :: rest) -> skip_annotations (List rest)
|
||||
| ListRef { contents = [] } -> Nil
|
||||
| ListRef { contents = Keyword _ :: _ :: rest } -> skip_annotations (List rest)
|
||||
| List (first :: _) -> first
|
||||
| ListRef { contents = first :: _ } -> first
|
||||
| _ -> Nil
|
||||
|
||||
(* compile_match: uses local recursion (letrec) that the transpiler can't handle.
|
||||
Falls back to CEK evaluation at runtime. *)
|
||||
let compile_match em args scope tail_p =
|
||||
let fn = Sx_ref.eval_expr (Symbol "compile-match") (Env (Sx_types.make_env ())) in
|
||||
ignore (Sx_ref.cek_call fn (List [em; args; scope; tail_p]))
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
import tempfile
|
||||
from shared.sx.ocaml_sync import OcamlSync
|
||||
from shared.sx.parser import serialize
|
||||
|
||||
# Load the transpiler into OCaml kernel
|
||||
bridge = OcamlSync()
|
||||
transpiler_path = os.path.join(_HERE, "transpiler.sx")
|
||||
bridge.load(transpiler_path)
|
||||
|
||||
# Read compiler.sx
|
||||
compiler_path = os.path.join(_PROJECT, "lib", "compiler.sx")
|
||||
with open(compiler_path) as f:
|
||||
src = f.read()
|
||||
defines = extract_defines(src)
|
||||
|
||||
# Skip functions that use letrec/named-let (transpiler can't handle)
|
||||
skip = {"compile-match"}
|
||||
defines = [(n, e) for n, e in defines if n not in skip]
|
||||
# Deduplicate (keep last definition)
|
||||
seen = {}
|
||||
for i, (n, e) in enumerate(defines):
|
||||
seen[n] = i
|
||||
defines = [(n, e) for i, (n, e) in enumerate(defines) if seen[n] == i]
|
||||
|
||||
print(f"Transpiling {len(defines)} defines from compiler.sx...", file=sys.stderr)
|
||||
|
||||
# Build the defines list and known names for the transpiler
|
||||
defines_list = [[name, expr] for name, expr in defines]
|
||||
known_names = [name for name, _ in defines]
|
||||
|
||||
# Serialize to temp file, load into kernel
|
||||
defines_sx = serialize(defines_list)
|
||||
known_sx = serialize(known_names)
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".sx", delete=False) as tmp:
|
||||
tmp.write(f"(define _defines '{defines_sx})\n")
|
||||
tmp.write(f"(define _known_defines '{known_sx})\n")
|
||||
tmp_path = tmp.name
|
||||
try:
|
||||
bridge.load(tmp_path)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
# Call ml-translate-file — emits as single let rec block
|
||||
result = bridge.eval("(ml-translate-file _defines)")
|
||||
|
||||
bridge.stop()
|
||||
|
||||
output = PREAMBLE + "\n(* === Transpiled from bytecode compiler === *)\n" + result + "\n"
|
||||
|
||||
# Post-process: fix skip_annotations local NativeFn → use top-level
|
||||
old = 'then (let skip_annotations = (NativeFn ('
|
||||
if old in output:
|
||||
idx = output.index(old)
|
||||
end_marker = 'in (skip_annotations (rest_args)))'
|
||||
end_idx = output.index(end_marker, idx)
|
||||
output = output[:idx] + 'then (skip_annotations (rest_args))' + output[end_idx + len(end_marker):]
|
||||
|
||||
# Write output
|
||||
out_path = os.path.join(_HERE, "lib", "sx_compiler.ml")
|
||||
with open(out_path, "w") as f:
|
||||
f.write(output)
|
||||
print(f"Wrote {len(output)} bytes to {out_path}", file=sys.stderr)
|
||||
print(f" {len(defines)} functions transpiled", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user