#!/usr/bin/env python3 """ Bootstrap the SX bytecode VM to native OCaml. Loads the SX-to-OCaml transpiler (transpiler.sx), feeds it the logic functions from lib/vm.sx, and produces sx_vm_ref.ml. Type construction and performance-critical functions stay as native OCaml in the preamble. Logic (opcode dispatch, call routing, execution loop) is transpiled from SX. Usage: python3 hosts/ocaml/bootstrap_vm.py """ from __future__ import annotations import os import sys import tempfile _HERE = os.path.dirname(os.path.abspath(__file__)) _PROJECT = os.path.abspath(os.path.join(_HERE, "..", "..")) sys.path.insert(0, _PROJECT) from shared.sx.parser import parse_all, serialize from shared.sx.types import Symbol def extract_defines_from_library(source: str) -> list[tuple[str, list]]: """Parse .sx source with define-library wrapper, extract defines from begin body.""" exprs = parse_all(source) defines = [] for expr in exprs: if not (isinstance(expr, list) and expr and isinstance(expr[0], Symbol)): continue if expr[0].name == "define": name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) defines.append((name, expr)) elif expr[0].name == "define-library": # Extract defines from (begin ...) declarations for decl in expr[2:]: if isinstance(decl, list) and decl and isinstance(decl[0], Symbol) and decl[0].name == "begin": for form in decl[1:]: if isinstance(form, list) and form and isinstance(form[0], Symbol) and form[0].name == "define": name = form[1].name if isinstance(form[1], Symbol) else str(form[1]) defines.append((name, form)) return defines # Functions provided by the native OCaml preamble — skip from transpilation. # These handle type construction and performance-critical ops. SKIP = { # Type construction "make-upvalue-cell", "uv-get", "uv-set!", "make-vm-code", "make-vm-closure", "make-vm-frame", "make-vm", # Stack ops "vm-push", "vm-pop", "vm-peek", # Frame ops "frame-read-u8", "frame-read-u16", "frame-read-i16", "frame-local-get", "frame-local-set", "frame-upvalue-get", "frame-upvalue-set", # Accessors (native OCaml field access) "frame-ip", "frame-set-ip!", "frame-base", "frame-closure", "closure-code", "closure-upvalues", "closure-env", "code-bytecode", "code-constants", "code-locals", "vm-sp", "vm-set-sp!", "vm-stack", "vm-set-stack!", "vm-frames", "vm-set-frames!", "vm-globals-ref", # Global ops "vm-global-get", "vm-global-set", # Complex native ops "vm-push-frame", "code-from-value", "vm-closure?", "vm-create-closure", # Lambda accessors (native type) "lambda?", "lambda-compiled", "lambda-set-compiled!", "lambda-name", # JIT dispatch + active VM (platform-specific) "*active-vm*", "*jit-compile-fn*", "try-jit-call", "vm-call-closure", # Env access (used by env-walk) "env-walk", "env-walk-set!", # CEK interop "cek-call-or-suspend", # Collection helpers (use mutable state + recursion) "collect-n-from-stack", "collect-n-pairs", "pad-n-nils", } PREAMBLE = """\ (* sx_vm_ref.ml — Auto-generated from lib/vm.sx *) (* Do not edit — regenerate with: python3 hosts/ocaml/bootstrap_vm.py *) [@@@warning "-26-27-39"] open Sx_types open Sx_runtime (* ================================================================ Forward references for CEK interop ================================================================ *) let cek_call = Sx_ref.cek_call let eval_expr = Sx_ref.eval_expr let trampoline v = match v with | Thunk (expr, env) -> Sx_ref.eval_expr expr (Env env) | other -> other (* SX List → OCaml list *) let to_ocaml_list v = match v with List l -> l | Nil -> [] | _ -> [v] (* str as NativeFn value — transpiled code passes it to sx_apply *) let str = NativeFn ("str", fun args -> String (sx_str args)) (* Primitive call dispatch — transpiled code uses this for CALL_PRIM *) let call_primitive name args = let n = value_to_string name in prim_call n (to_ocaml_list args) (* ================================================================ Preamble: 48 native OCaml functions for VM type access. These are SKIPPED from transpilation — the transpiled logic functions call them for all type construction and field access. ================================================================ *) (* --- Unwrap helpers --- *) let unwrap_vm v = match v with VmMachine m -> m | _ -> raise (Eval_error "not a vm") let unwrap_frame v = match v with VmFrame f -> f | _ -> raise (Eval_error "not a frame") let unwrap_closure v = match v with VmClosure c -> c | _ -> raise (Eval_error "not a closure") (* --- Upvalue cells (internal to preamble — never SX values) --- *) let _make_uv_cell v : vm_upvalue_cell = { uv_value = v } let _uv_get (c : vm_upvalue_cell) = c.uv_value let _uv_set (c : vm_upvalue_cell) v = c.uv_value <- v (* SX-facing stubs (in skip set, never called from transpiled code) *) let make_upvalue_cell v = Nil let uv_get _ = Nil let uv_set_b _ _ = Nil (* --- VM code construction --- *) let code_from_value v = Sx_vm.code_from_value v let make_vm_code arity locals bytecode constants = (* Build a Dict that code_from_value can parse *) let d = Hashtbl.create 4 in Hashtbl.replace d "arity" arity; Hashtbl.replace d "bytecode" bytecode; Hashtbl.replace d "constants" constants; Dict d (* --- VM closure --- *) let make_vm_closure code upvalues name globals closure_env = let uv = match upvalues with | List l -> Array.of_list (List.map (fun v -> { uv_value = v }) l) | _ -> [||] in VmClosure { vm_code = code_from_value code; vm_upvalues = uv; vm_name = (match name with String s -> Some s | Nil -> None | _ -> None); vm_env_ref = (match globals with Dict d -> d | _ -> Hashtbl.create 0); vm_closure_env = (match closure_env with Env e -> Some e | _ -> None) } (* --- VM frame --- *) let make_vm_frame closure base = let cl = unwrap_closure closure in VmFrame { vf_closure = cl; vf_ip = 0; vf_base = val_to_int base; vf_local_cells = Hashtbl.create 4 } (* --- VM machine --- *) let make_vm globals = let g = match globals with Dict d -> d | _ -> Hashtbl.create 0 in VmMachine { vm_stack = Array.make 4096 Nil; vm_sp = 0; vm_frames = []; vm_globals = g; vm_pending_cek = None } (* --- Stack ops --- *) let vm_push vm_val v = let m = unwrap_vm vm_val in if m.vm_sp >= Array.length m.vm_stack then begin let ns = Array.make (m.vm_sp * 2) Nil in Array.blit m.vm_stack 0 ns 0 m.vm_sp; m.vm_stack <- ns end; m.vm_stack.(m.vm_sp) <- v; m.vm_sp <- m.vm_sp + 1; Nil let vm_pop vm_val = let m = unwrap_vm vm_val in m.vm_sp <- m.vm_sp - 1; m.vm_stack.(m.vm_sp) let vm_peek vm_val = let m = unwrap_vm vm_val in m.vm_stack.(m.vm_sp - 1) (* --- Frame operand reading --- *) let frame_read_u8 frame_val = let f = unwrap_frame frame_val in let v = f.vf_closure.vm_code.vc_bytecode.(f.vf_ip) in f.vf_ip <- f.vf_ip + 1; Number (float_of_int v) let frame_read_u16 frame_val = let f = unwrap_frame frame_val in let lo = f.vf_closure.vm_code.vc_bytecode.(f.vf_ip) in let hi = f.vf_closure.vm_code.vc_bytecode.(f.vf_ip + 1) in f.vf_ip <- f.vf_ip + 2; Number (float_of_int (lo lor (hi lsl 8))) let frame_read_i16 frame_val = let f = unwrap_frame frame_val in let lo = f.vf_closure.vm_code.vc_bytecode.(f.vf_ip) in let hi = f.vf_closure.vm_code.vc_bytecode.(f.vf_ip + 1) in f.vf_ip <- f.vf_ip + 2; let v = lo lor (hi lsl 8) in Number (float_of_int (if v >= 32768 then v - 65536 else v)) (* --- Local variable access --- *) let frame_local_get vm_val frame_val slot = let m = unwrap_vm vm_val in let f = unwrap_frame frame_val in let idx = f.vf_base + val_to_int slot in (* Check for shared upvalue cell *) match Hashtbl.find_opt f.vf_local_cells (val_to_int slot) with | Some cell -> cell.uv_value | None -> m.vm_stack.(idx) let frame_local_set vm_val frame_val slot v = let m = unwrap_vm vm_val in let f = unwrap_frame frame_val in let s = val_to_int slot in (* If slot has a shared cell, write through cell *) (match Hashtbl.find_opt f.vf_local_cells s with | Some cell -> cell.uv_value <- v | None -> m.vm_stack.(f.vf_base + s) <- v); Nil (* --- Upvalue access --- *) let frame_upvalue_get frame_val idx = let f = unwrap_frame frame_val in f.vf_closure.vm_upvalues.(val_to_int idx).uv_value let frame_upvalue_set frame_val idx v = let f = unwrap_frame frame_val in f.vf_closure.vm_upvalues.(val_to_int idx).uv_value <- v; Nil (* --- Field accessors --- *) let frame_ip f = let fr = unwrap_frame f in Number (float_of_int fr.vf_ip) let frame_set_ip_b f v = let fr = unwrap_frame f in fr.vf_ip <- val_to_int v; Nil let frame_base f = let fr = unwrap_frame f in Number (float_of_int fr.vf_base) let frame_closure f = let fr = unwrap_frame f in VmClosure fr.vf_closure let closure_code cl = let c = unwrap_closure cl in (* Return as Dict for code_bytecode/code_constants/code_locals *) let d = Hashtbl.create 4 in Hashtbl.replace d "vc-bytecode" (List (Array.to_list (Array.map (fun i -> Number (float_of_int i)) c.vm_code.vc_bytecode))); Hashtbl.replace d "vc-constants" (List (Array.to_list c.vm_code.vc_constants)); Hashtbl.replace d "vc-arity" (Number (float_of_int c.vm_code.vc_arity)); Hashtbl.replace d "vc-locals" (Number (float_of_int c.vm_code.vc_locals)); Dict d let closure_upvalues cl = let c = unwrap_closure cl in List (Array.to_list (Array.map (fun cell -> cell.uv_value) c.vm_upvalues)) let closure_env cl = match cl with | VmClosure c -> (match c.vm_closure_env with Some e -> Env e | None -> Nil) | _ -> Nil let code_bytecode code = get_val code (String "vc-bytecode") let code_constants code = get_val code (String "vc-constants") let code_locals code = get_val code (String "vc-locals") let vm_sp v = let m = unwrap_vm v in Number (float_of_int m.vm_sp) let vm_set_sp_b v s = let m = unwrap_vm v in m.vm_sp <- val_to_int s; Nil let vm_stack v = let _m = unwrap_vm v in Nil (* opaque — use vm_push/pop *) let vm_set_stack_b v _s = Nil let vm_frames v = let m = unwrap_vm v in List (List.map (fun f -> VmFrame f) m.vm_frames) let vm_set_frames_b v fs = let m = unwrap_vm v in m.vm_frames <- (match fs with | List l -> List.map unwrap_frame l | _ -> []); Nil let vm_globals_ref v = let m = unwrap_vm v in Dict m.vm_globals (* --- Global variable access --- *) let vm_global_get vm_val frame_val name = let m = unwrap_vm vm_val in let n = value_to_string name in (* Try globals table first *) match Hashtbl.find_opt m.vm_globals n with | Some v -> v | None -> (* Walk closure env chain *) let f = unwrap_frame frame_val in (match f.vf_closure.vm_closure_env with | Some env -> let id = intern n in let rec find_env e = match Hashtbl.find_opt e.bindings id with | Some v -> v | None -> (match e.parent with Some p -> find_env p | None -> (* Try evaluator's primitive table as last resort *) (try prim_call n [] with _ -> raise (Eval_error ("VM undefined: " ^ n)))) in find_env env | None -> (try prim_call n [] with _ -> raise (Eval_error ("VM undefined: " ^ n)))) let vm_global_set vm_val frame_val name v = let m = unwrap_vm vm_val in let n = value_to_string name in let f = unwrap_frame frame_val in (* Write to closure env if name exists there *) let written = match f.vf_closure.vm_closure_env with | Some env -> let id = intern n in let rec find_env e = if Hashtbl.mem e.bindings id then (Hashtbl.replace e.bindings id v; true) else match e.parent with Some p -> find_env p | None -> false in find_env env | None -> false in if not written then begin Hashtbl.replace m.vm_globals n v; (match !_vm_global_set_hook with Some f -> f n v | None -> ()) end; Nil (* --- Frame push --- *) let vm_push_frame vm_val closure_val args = let m = unwrap_vm vm_val in let cl = unwrap_closure closure_val in let f = { vf_closure = cl; vf_ip = 0; vf_base = m.vm_sp; vf_local_cells = Hashtbl.create 4 } in let arg_list = to_ocaml_list args in List.iter (fun a -> m.vm_stack.(m.vm_sp) <- a; m.vm_sp <- m.vm_sp + 1 ) arg_list; (* Pad remaining locals *) for _ = List.length arg_list to cl.vm_code.vc_locals - 1 do m.vm_stack.(m.vm_sp) <- Nil; m.vm_sp <- m.vm_sp + 1 done; m.vm_frames <- f :: m.vm_frames; Nil (* --- Closure type check --- *) let vm_closure_p v = match v with VmClosure _ -> Bool true | _ -> Bool false (* --- Closure creation (upvalue capture) --- *) let vm_create_closure vm_val frame_val code_val = let m = unwrap_vm vm_val in let f = unwrap_frame frame_val in let uv_count = match code_val with | Dict d -> (match Hashtbl.find_opt d "upvalue-count" with | Some (Number n) -> int_of_float n | _ -> 0) | _ -> 0 in let upvalues = Array.init uv_count (fun _ -> let is_local = let v = f.vf_closure.vm_code.vc_bytecode.(f.vf_ip) in f.vf_ip <- f.vf_ip + 1; v in let index = let v = f.vf_closure.vm_code.vc_bytecode.(f.vf_ip) in f.vf_ip <- f.vf_ip + 1; v in if is_local = 1 then begin match Hashtbl.find_opt f.vf_local_cells index with | Some existing -> existing | None -> let c = { uv_value = m.vm_stack.(f.vf_base + index) } in Hashtbl.replace f.vf_local_cells index c; c end else f.vf_closure.vm_upvalues.(index) ) in let code = code_from_value code_val in VmClosure { vm_code = code; vm_upvalues = upvalues; vm_name = None; vm_env_ref = m.vm_globals; vm_closure_env = f.vf_closure.vm_closure_env } (* --- JIT sentinel --- *) let _jit_failed_sentinel = { vm_code = { vc_arity = -1; vc_locals = 0; vc_bytecode = [||]; vc_constants = [||] }; vm_upvalues = [||]; vm_name = Some "__jit_failed__"; vm_env_ref = Hashtbl.create 0; vm_closure_env = None } let _is_jit_failed cl = cl.vm_code.vc_arity = -1 (* --- Lambda accessors --- *) let is_lambda v = match v with Lambda _ -> Bool true | _ -> Bool false let lambda_compiled v = match v with | Lambda l -> (match l.l_compiled with Some c -> VmClosure c | None -> Nil) | _ -> Nil let lambda_set_compiled_b v c = match v with | Lambda l -> (match c with | VmClosure cl -> l.l_compiled <- Some cl; Nil | String "jit-failed" -> l.l_compiled <- Some _jit_failed_sentinel; Nil | _ -> l.l_compiled <- None; Nil) | _ -> Nil let lambda_name v = match v with | Lambda l -> (match l.l_name with Some n -> String n | None -> Nil) | _ -> Nil (* --- CEK call with suspension awareness --- *) let cek_call_or_suspend vm_val f args = let a = to_ocaml_list args in let state = Sx_ref.continue_with_call f (List a) (Env (Sx_types.make_env ())) (List a) (List []) in let final = Sx_ref.cek_step_loop state in match get_val final (String "phase") with | String "io-suspended" -> let m = unwrap_vm vm_val in m.vm_pending_cek <- Some final; raise (Sx_vm.VmSuspended (get_val final (String "request"), Sx_vm.create m.vm_globals)) | _ -> Sx_ref.cek_value final (* --- Env walking (for global variable resolution) --- *) let rec env_walk env name = match env with | Env e -> let id = intern (value_to_string name) in let rec find e = match Hashtbl.find_opt e.bindings id with | Some v -> v | None -> (match e.parent with Some p -> find p | None -> Nil) in find e | Nil -> Nil | _ -> Nil let env_walk_set_b env name value = match env with | Env e -> let id = intern (value_to_string name) in let rec find e = if Hashtbl.mem e.bindings id then (Hashtbl.replace e.bindings id value; true) else match e.parent with Some p -> find p | None -> false in if find e then Nil else Nil | _ -> Nil (* --- Active VM tracking (module-level mutable state) --- *) let _active_vm : vm_machine option ref = ref None (* Forward ref — resolved after transpiled let rec block *) let _vm_run_fn : (value -> value) ref = ref (fun _ -> Nil) let _vm_call_fn : (value -> value -> value -> value) ref = ref (fun _ _ _ -> Nil) (* vm-call-closure: creates fresh VM, runs closure, returns result *) let vm_call_closure closure_val args globals = let cl = unwrap_closure closure_val in let prev_vm = !_active_vm in let g = match globals with Dict d -> d | _ -> Hashtbl.create 0 in let m = { vm_stack = Array.make 4096 Nil; vm_sp = 0; vm_frames = []; vm_globals = g; vm_pending_cek = None } in let vm_val = VmMachine m in _active_vm := Some m; ignore (vm_push_frame vm_val closure_val args); (try ignore (!_vm_run_fn vm_val) with e -> _active_vm := prev_vm; raise e); _active_vm := prev_vm; vm_pop vm_val (* --- JIT dispatch (platform-specific) --- *) let try_jit_call vm_val f args = let m = unwrap_vm vm_val in match f with | Lambda l -> (match l.l_compiled with | Some cl when not (_is_jit_failed cl) -> (try vm_push vm_val (vm_call_closure (VmClosure cl) args (Dict cl.vm_env_ref)) with _ -> vm_push vm_val (cek_call_or_suspend vm_val f args)) | Some _ -> vm_push vm_val (cek_call_or_suspend vm_val f args) | None -> if l.l_name <> None then begin l.l_compiled <- Some _jit_failed_sentinel; match !Sx_vm.jit_compile_ref l m.vm_globals with | Some cl -> l.l_compiled <- Some cl; (try vm_push vm_val (vm_call_closure (VmClosure cl) args (Dict cl.vm_env_ref)) with _ -> vm_push vm_val (cek_call_or_suspend vm_val f args)) | None -> vm_push vm_val (cek_call_or_suspend vm_val f args) end else vm_push vm_val (cek_call_or_suspend vm_val f args)) | _ -> vm_push vm_val (cek_call_or_suspend vm_val f args) (* --- Collection helpers --- *) let collect_n_from_stack vm_val n = let m = unwrap_vm vm_val in let count = val_to_int n in let result = ref [] in for _ = 1 to count do m.vm_sp <- m.vm_sp - 1; result := m.vm_stack.(m.vm_sp) :: !result done; List !result let collect_n_pairs vm_val n = let m = unwrap_vm vm_val in let count = val_to_int n in let d = Hashtbl.create count in for _ = 1 to count do m.vm_sp <- m.vm_sp - 1; let v = m.vm_stack.(m.vm_sp) in m.vm_sp <- m.vm_sp - 1; let k = value_to_string m.vm_stack.(m.vm_sp) in Hashtbl.replace d k v done; Dict d let pad_n_nils vm_val n = let m = unwrap_vm vm_val in let count = val_to_int n in for _ = 1 to count do m.vm_stack.(m.vm_sp) <- Nil; m.vm_sp <- m.vm_sp + 1 done; Nil """ def main(): from shared.sx.ocaml_sync import OcamlSync # Load the transpiler into OCaml kernel bridge = OcamlSync() transpiler_path = os.path.join(_HERE, "transpiler.sx") bridge.load(transpiler_path) # Read vm.sx vm_path = os.path.join(_PROJECT, "lib", "vm.sx") with open(vm_path) as f: src = f.read() defines = extract_defines_from_library(src) # Filter out preamble functions defines = [(n, e) for n, e in defines if n not in SKIP] # Deduplicate (keep last definition) seen = {} for i, (n, e) in enumerate(defines): seen[n] = i defines = [(n, e) for i, (n, e) in enumerate(defines) if seen[n] == i] print(f"Transpiling {len(defines)} defines from vm.sx...", file=sys.stderr) print(f" Skipped {len(SKIP)} preamble functions", file=sys.stderr) for name, _ in defines: print(f" -> {name}", file=sys.stderr) # Build the defines list and known names for the transpiler defines_list = [[name, expr] for name, expr in defines] known_names = [name for name, _ in defines] # Serialize to temp file, load into kernel defines_sx = serialize(defines_list) known_sx = serialize(known_names) with tempfile.NamedTemporaryFile(mode="w", suffix=".sx", delete=False) as tmp: tmp.write(f"(define _defines '{defines_sx})\n") tmp.write(f"(define _known_defines '{known_sx})\n") tmp_path = tmp.name try: bridge.load(tmp_path) finally: os.unlink(tmp_path) # Call ml-translate-file — emits as single let rec block result = bridge.eval("(ml-translate-file _defines)") bridge.stop() fixups = """ (* Wire forward references to transpiled functions *) let () = _vm_run_fn := vm_run let () = _vm_call_fn := vm_call (* ================================================================ Public API — matches Sx_vm interface for drop-in replacement ================================================================ *) (** Execute a compiled module — entry point for load-sxbc, compile-blob. *) let execute_module (code : vm_code) (globals : (string, value) Hashtbl.t) = let cl = { vm_code = code; vm_upvalues = [||]; vm_name = Some "module"; vm_env_ref = globals; vm_closure_env = None } in let m = { vm_stack = Array.make 4096 Nil; vm_sp = 0; vm_frames = []; vm_globals = globals; vm_pending_cek = None } in let vm_val = VmMachine m in let frame = { vf_closure = cl; vf_ip = 0; vf_base = 0; vf_local_cells = Hashtbl.create 4 } in for _ = 0 to code.vc_locals - 1 do m.vm_stack.(m.vm_sp) <- Nil; m.vm_sp <- m.vm_sp + 1 done; m.vm_frames <- [frame]; ignore (vm_run vm_val); vm_pop vm_val (** Execute a closure with args — entry point for JIT Lambda calls. *) let call_closure (cl : vm_closure) (args : value list) (globals : (string, value) Hashtbl.t) = vm_call_closure (VmClosure cl) (List args) (Dict globals) (** Reexport code_from_value for callers *) let code_from_value = code_from_value (** Reexport jit refs *) let jit_compile_ref = Sx_vm.jit_compile_ref let jit_failed_sentinel = _jit_failed_sentinel let is_jit_failed = _is_jit_failed """ output = PREAMBLE + "\n(* === Transpiled from lib/vm.sx === *)\n" + result + "\n" + fixups # Write output out_path = os.path.join(_HERE, "sx_vm_ref.ml") with open(out_path, "w") as f: f.write(output) print(f"Wrote {len(output)} bytes to {out_path}", file=sys.stderr) print(f" {len(defines)} functions transpiled", file=sys.stderr) if __name__ == "__main__": main()