Files
rose-ash/hosts/ocaml/bootstrap_vm.py
giles fc2b5e502f Step 5p6 lazy loading + Step 6b VM transpilation prep
Lazy module loading (Step 5 piece 6 completion):
- Add define-library wrappers + import declarations to 13 source .sx files
- compile-modules.js generates module-manifest.json with dependency graph
- compile-modules.js strips define-library/import before bytecode compilation
  (VM doesn't handle these as special forms)
- sx-platform.js replaces hardcoded 24-file loadWebStack() with manifest-driven
  recursive loader — only downloads modules the page needs
- Result: 12 modules loaded (was 24), zero errors, zero warnings
- Fallback to full load if manifest missing

VM transpilation prep (Step 6b):
- Refactor lib/vm.sx: 20 accessor functions replace raw dict access
- Factor out collect-n-from-stack, collect-n-pairs, pad-n-nils helpers
- bootstrap_vm.py: transpiles 9 VM logic functions to OCaml
- sx_vm_ref.ml: proof that vm.sx transpiles (preamble has stubs)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 12:18:41 +00:00

280 lines
9.7 KiB
Python

#!/usr/bin/env python3
"""
Bootstrap the SX bytecode VM to native OCaml.
Loads the SX-to-OCaml transpiler (transpiler.sx), feeds it the logic
functions from lib/vm.sx, and produces sx_vm_ref.ml.
Type construction and performance-critical functions stay as native OCaml
in the preamble. Logic (opcode dispatch, call routing, execution loop)
is transpiled from SX.
Usage:
python3 hosts/ocaml/bootstrap_vm.py
"""
from __future__ import annotations
import os
import sys
import tempfile
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT = os.path.abspath(os.path.join(_HERE, "..", ".."))
sys.path.insert(0, _PROJECT)
from shared.sx.parser import parse_all, serialize
from shared.sx.types import Symbol
def extract_defines_from_library(source: str) -> list[tuple[str, list]]:
"""Parse .sx source with define-library wrapper, extract defines from begin body."""
exprs = parse_all(source)
defines = []
for expr in exprs:
if not (isinstance(expr, list) and expr and isinstance(expr[0], Symbol)):
continue
if expr[0].name == "define":
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
defines.append((name, expr))
elif expr[0].name == "define-library":
# Extract defines from (begin ...) declarations
for decl in expr[2:]:
if isinstance(decl, list) and decl and isinstance(decl[0], Symbol) and decl[0].name == "begin":
for form in decl[1:]:
if isinstance(form, list) and form and isinstance(form[0], Symbol) and form[0].name == "define":
name = form[1].name if isinstance(form[1], Symbol) else str(form[1])
defines.append((name, form))
return defines
# Functions provided by the native OCaml preamble — skip from transpilation.
# These handle type construction and performance-critical ops.
SKIP = {
# Type construction
"make-upvalue-cell", "uv-get", "uv-set!",
"make-vm-code", "make-vm-closure", "make-vm-frame", "make-vm",
# Stack ops
"vm-push", "vm-pop", "vm-peek",
# Frame ops
"frame-read-u8", "frame-read-u16", "frame-read-i16",
"frame-local-get", "frame-local-set",
"frame-upvalue-get", "frame-upvalue-set",
# Accessors (native OCaml field access)
"frame-ip", "frame-set-ip!", "frame-base", "frame-closure",
"closure-code", "closure-upvalues", "closure-env",
"code-bytecode", "code-constants", "code-locals",
"vm-sp", "vm-set-sp!", "vm-stack", "vm-set-stack!",
"vm-frames", "vm-set-frames!", "vm-globals-ref",
# Global ops
"vm-global-get", "vm-global-set",
# Complex native ops
"vm-push-frame", "code-from-value", "vm-closure?",
"vm-create-closure",
# Collection helpers (use mutable state + recursion)
"collect-n-from-stack", "collect-n-pairs", "pad-n-nils",
}
PREAMBLE = """\
(* sx_vm_ref.ml — Auto-generated from lib/vm.sx *)
(* Do not edit — regenerate with: python3 hosts/ocaml/bootstrap_vm.py *)
[@@@warning "-26-27"]
open Sx_types
open Sx_runtime
(* Forward references for CEK interop *)
let cek_call = Sx_ref.cek_call
let eval_expr = Sx_ref.eval_expr
let trampoline v = match v with
| Thunk (expr, env) -> Sx_ref.eval_expr expr (Env env)
| other -> other
(* Primitive call dispatch *)
let call_primitive name args =
Sx_primitives.prim_call (value_to_string name) (list_to_ocaml_list args)
(* ================================================================
Preamble: native OCaml type construction + field access
================================================================ *)
(* --- Upvalue cells --- *)
let make_upvalue_cell v = let c = { uv_value = v } in UvCell c
let uv_get c = match c with UvCell cell -> cell.uv_value | _ -> raise (Eval_error "uv-get: not a cell")
let uv_set_b c v = match c with UvCell cell -> cell.uv_value <- v | _ -> raise (Eval_error "uv-set!: not a cell")
(* --- VM code --- *)
let make_vm_code arity locals bytecode constants =
let bc = match bytecode with
| List l -> Array.of_list (List.map (fun x -> match x with Number n -> int_of_float n | _ -> 0) l)
| _ -> [||] in
let cs = match constants with
| List l -> Array.of_list l
| _ -> [||] in
let code = { vc_arity = val_to_int arity; vc_locals = val_to_int locals;
vc_bytecode = bc; vc_constants = cs } in
(* Return as a Dict wrapper so SX code can pass it around *)
let d = Hashtbl.create 4 in
Hashtbl.replace d "vc-bytecode" bytecode;
Hashtbl.replace d "vc-constants" constants;
Hashtbl.replace d "vc-arity" arity;
Hashtbl.replace d "vc-locals" locals;
Hashtbl.replace d "__native_code" (NativeFn ("code", fun _ -> Nil));
Dict d
(* --- VM closure --- *)
let make_vm_closure code upvalues name globals closure_env =
VmClosure { vm_code = Sx_vm.code_from_value code;
vm_upvalues = (match upvalues with List l -> Array.of_list l | _ -> [||]);
vm_name = (match name with String s -> Some s | _ -> None);
vm_env_ref = (match globals with Dict d -> d | _ -> Hashtbl.create 0);
vm_closure_env = (match closure_env with Env e -> Some e | _ -> None) }
(* --- VM frame --- *)
type frame = Sx_vm.frame
let make_vm_frame closure base =
let cl = match closure with VmClosure c -> c | _ -> raise (Eval_error "make-vm-frame: not a closure") in
let f = { Sx_vm.closure = cl; ip = 0;
base = val_to_int base;
local_cells = Hashtbl.create 4 } in
(* Wrap as Dict for SX code *)
let d = Hashtbl.create 4 in
Hashtbl.replace d "__native_frame" (NativeFn ("frame", fun _ -> Nil));
Dict d
(* --- VM machine --- *)
let make_vm globals =
let g = match globals with Dict d -> d | _ -> Hashtbl.create 0 in
let vm = Sx_vm.create g in
(* Wrap as Dict for SX code *)
let d = Hashtbl.create 4 in
Hashtbl.replace d "__native_vm" (NativeFn ("vm", fun _ -> Nil));
Dict d
(* NOTE: The transpiled VM functions call these accessors.
For now, the transpiled code delegates to the existing Sx_vm module.
Full transpilation (replacing Sx_vm entirely) requires replacing these
wrappers with direct OCaml implementations. *)
(* --- Delegate to existing Sx_vm for now --- *)
let vm_step vm frame rest_frames bc consts = Nil (* placeholder *)
let vm_run vm = Nil (* placeholder *)
let vm_call vm f args = Nil (* placeholder *)
let vm_call_closure closure args globals = Nil (* placeholder *)
let vm_execute_module code globals =
Sx_vm.execute_module (Sx_vm.code_from_value code)
(match globals with Dict d -> d | _ -> Hashtbl.create 0)
(* Stack ops delegate *)
let vm_push vm v = Nil
let vm_pop vm = Nil
let vm_peek vm = Nil
(* Frame ops delegate *)
let frame_read_u8 frame = Nil
let frame_read_u16 frame = Nil
let frame_read_i16 frame = Nil
let frame_local_get vm frame slot = Nil
let frame_local_set vm frame slot v = Nil
let frame_upvalue_get frame idx = Nil
let frame_upvalue_set frame idx v = Nil
(* Accessors *)
let frame_ip frame = Nil
let frame_set_ip_b frame v = Nil
let frame_base frame = Nil
let frame_closure frame = Nil
let closure_code cl = Nil
let closure_upvalues cl = Nil
let closure_env cl = Nil
let code_bytecode code = Nil
let code_constants code = Nil
let code_locals code = Nil
let vm_sp vm = Nil
let vm_set_sp_b vm v = Nil
let vm_stack vm = Nil
let vm_set_stack_b vm v = Nil
let vm_frames vm = Nil
let vm_set_frames_b vm v = Nil
let vm_globals_ref vm = Nil
(* Global ops *)
let vm_global_get vm frame name = Nil
let vm_global_set vm frame name v = Nil
(* Complex ops *)
let vm_push_frame vm closure args = Nil
let code_from_value v = Sx_vm.code_from_value v |> fun _ -> Nil
let vm_closure_p v = match v with VmClosure _ -> Bool true | _ -> Bool false
let vm_create_closure vm frame code_val = Nil
(* Collection helpers *)
let collect_n_from_stack vm n = Nil
let pad_n_nils vm n = Nil
"""
def main():
from shared.sx.ocaml_sync import OcamlSync
# Load the transpiler into OCaml kernel
bridge = OcamlSync()
transpiler_path = os.path.join(_HERE, "transpiler.sx")
bridge.load(transpiler_path)
# Read vm.sx
vm_path = os.path.join(_PROJECT, "lib", "vm.sx")
with open(vm_path) as f:
src = f.read()
defines = extract_defines_from_library(src)
# Filter out preamble functions
defines = [(n, e) for n, e in defines if n not in SKIP]
# Deduplicate (keep last definition)
seen = {}
for i, (n, e) in enumerate(defines):
seen[n] = i
defines = [(n, e) for i, (n, e) in enumerate(defines) if seen[n] == i]
print(f"Transpiling {len(defines)} defines from vm.sx...", file=sys.stderr)
print(f" Skipped {len(SKIP)} preamble functions", file=sys.stderr)
for name, _ in defines:
print(f" -> {name}", file=sys.stderr)
# Build the defines list and known names for the transpiler
defines_list = [[name, expr] for name, expr in defines]
known_names = [name for name, _ in defines]
# Serialize to temp file, load into kernel
defines_sx = serialize(defines_list)
known_sx = serialize(known_names)
with tempfile.NamedTemporaryFile(mode="w", suffix=".sx", delete=False) as tmp:
tmp.write(f"(define _defines '{defines_sx})\n")
tmp.write(f"(define _known_defines '{known_sx})\n")
tmp_path = tmp.name
try:
bridge.load(tmp_path)
finally:
os.unlink(tmp_path)
# Call ml-translate-file — emits as single let rec block
result = bridge.eval("(ml-translate-file _defines)")
bridge.stop()
output = PREAMBLE + "\n(* === Transpiled from lib/vm.sx === *)\n" + result + "\n"
# Write output
out_path = os.path.join(_HERE, "sx_vm_ref.ml")
with open(out_path, "w") as f:
f.write(output)
print(f"Wrote {len(output)} bytes to {out_path}", file=sys.stderr)
print(f" {len(defines)} functions transpiled", file=sys.stderr)
if __name__ == "__main__":
main()