Files
rose-ash/hosts/ocaml/lib/sx_vm.ml
giles b4107fa52b Restore JIT closure injection — needed for top-level function JIT
Removing injection broke GLOBAL_GET for all JIT-compiled functions,
not just mutable closures. Top-level functions like render-to-html
need their referenced bindings in the VM globals table.

Restore the original injection (only injects values not already in
globals). Mutable closure vars (parser's pos etc.) still get stale
snapshots and fall back to CEK — that's the known limitation to fix
with cell-based boxing later.

1166 passed, 0 failed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-29 21:19:35 +00:00

950 lines
42 KiB
OCaml

(** SX bytecode VM — stack-based interpreter.
Executes bytecode produced by compiler.sx.
Designed for speed: array-based stack, direct dispatch,
no allocation per step (unlike the CEK machine).
This is the platform-native execution engine. The same bytecode
runs on all platforms (OCaml, JS, WASM).
VM types (vm_code, vm_upvalue_cell, vm_closure) are defined in
sx_types.ml to share the mutual recursion block with [value]. *)
open Sx_types
(** Call frame — one per function invocation. *)
type frame = {
closure : vm_closure;
mutable ip : int;
base : int; (* base index in value stack for locals *)
local_cells : (int, vm_upvalue_cell) Hashtbl.t; (* slot → shared cell for captured locals *)
}
(** VM state. *)
type vm = {
mutable stack : value array;
mutable sp : int;
mutable frames : frame list;
globals : (string, value) Hashtbl.t; (* live reference to kernel env *)
}
(** Forward reference for JIT compilation — set after definition. *)
let jit_compile_ref : (lambda -> (string, value) Hashtbl.t -> vm_closure option) ref =
ref (fun _ _ -> None)
(** Sentinel closure indicating JIT compilation was attempted and failed.
Prevents retrying compilation on every call. *)
let jit_failed_sentinel = {
vm_code = { vc_arity = -1; vc_locals = 0; vc_bytecode = [||]; vc_constants = [||] };
vm_upvalues = [||]; vm_name = Some "__jit_failed__"; vm_env_ref = Hashtbl.create 0; vm_closure_env = None
}
let is_jit_failed cl = cl.vm_code.vc_arity = -1
(** Current active VM — allows HO primitives (map, filter, for-each, some)
to call VmClosure callbacks on the same VM instead of creating a new one.
This is critical: creating a new VM per callback loses the calling VM's
stack/frame context, causing upvalue-captured host objects to become
inaccessible. *)
let _active_vm : vm option ref = ref None
let create globals =
{ stack = Array.make 4096 Nil; sp = 0; frames = []; globals }
(** Stack ops — inlined for speed. *)
let push vm v =
if vm.sp >= Array.length vm.stack then begin
let ns = Array.make (vm.sp * 2) Nil in
Array.blit vm.stack 0 ns 0 vm.sp;
vm.stack <- ns
end;
vm.stack.(vm.sp) <- v;
vm.sp <- vm.sp + 1
let[@inline] pop vm =
vm.sp <- vm.sp - 1;
vm.stack.(vm.sp)
let[@inline] peek vm = vm.stack.(vm.sp - 1)
(** Read operands. *)
let[@inline] read_u8 f =
let v = f.closure.vm_code.vc_bytecode.(f.ip) in
f.ip <- f.ip + 1; v
let[@inline] read_u16 f =
let lo = f.closure.vm_code.vc_bytecode.(f.ip) in
let hi = f.closure.vm_code.vc_bytecode.(f.ip + 1) in
f.ip <- f.ip + 2;
lo lor (hi lsl 8)
let[@inline] read_i16 f =
let v = read_u16 f in
if v >= 32768 then v - 65536 else v
(** Wrap a VM closure as an SX value (NativeFn). *)
let closure_to_value cl =
NativeFn ("vm:" ^ (match cl.vm_name with Some n -> n | None -> "anon"),
fun args -> raise (Eval_error ("VM_CLOSURE_CALL:" ^ String.concat "," (List.map Sx_runtime.value_to_str args))))
(* Placeholder — actual calls go through vm_call below *)
let _vm_insn_count = ref 0
let _vm_call_count = ref 0
let _vm_cek_count = ref 0
let vm_reset_counters () = _vm_insn_count := 0; _vm_call_count := 0; _vm_cek_count := 0
let vm_report_counters () =
Printf.eprintf "[vm-perf] insns=%d calls=%d cek_fallbacks=%d\n%!"
!_vm_insn_count !_vm_call_count !_vm_cek_count
(** Push a VM closure frame onto the current VM — no new VM allocation.
This is the fast path for intra-VM closure calls. *)
let push_closure_frame vm cl args =
let frame = { closure = cl; ip = 0; base = vm.sp; local_cells = Hashtbl.create 4 } in
List.iter (fun a -> push vm a) args;
for _ = List.length args to cl.vm_code.vc_locals - 1 do push vm Nil done;
vm.frames <- frame :: vm.frames
(** Convert compiler output (SX dict) to a vm_code object. *)
let code_from_value v =
match v with
| Dict d ->
let bc_list = match Hashtbl.find_opt d "bytecode" with
| Some (List l | ListRef { contents = l }) ->
Array.of_list (List.map (fun x -> match x with Number n -> int_of_float n | _ -> 0) l)
| _ -> [||]
in
let entries = match Hashtbl.find_opt d "constants" with
| Some (List l | ListRef { contents = l }) -> Array.of_list l
| _ -> [||]
in
let constants = Array.map (fun entry ->
match entry with
| Dict ed when Hashtbl.mem ed "bytecode" -> entry (* nested code — convert lazily *)
| _ -> entry
) entries in
let arity = match Hashtbl.find_opt d "arity" with
| Some (Number n) -> int_of_float n | _ -> 0
in
{ vc_arity = arity; vc_locals = arity + 16; vc_bytecode = bc_list; vc_constants = constants }
| _ -> { vc_arity = 0; vc_locals = 16; vc_bytecode = [||]; vc_constants = [||] }
(** Execute a closure with arguments — creates a fresh VM.
Used for entry points: JIT Lambda calls, module execution, cross-boundary. *)
let rec call_closure cl args globals =
incr _vm_call_count;
let prev_vm = !_active_vm in
let vm = create globals in
_active_vm := Some vm;
push_closure_frame vm cl args;
(try run vm with e -> _active_vm := prev_vm; raise e);
_active_vm := prev_vm;
pop vm
(** Call a VmClosure on the active VM if one exists, otherwise create a new one.
This is the path used by HO primitives (map, filter, for-each, some) so
callbacks can access upvalues that reference the calling VM's state. *)
and call_closure_reuse cl args =
call_closure cl args cl.vm_env_ref
(** Call a value as a function — dispatch by type.
VmClosure: pushes frame on current VM (fast intra-VM path).
Lambda: tries JIT then falls back to CEK.
NativeFn: calls directly. *)
and vm_call vm f args =
match f with
| VmClosure cl ->
(* Fast path: push frame on current VM — no allocation, enables TCO *)
push_closure_frame vm cl args
| NativeFn (_name, fn) ->
let result = fn args in
push vm result
| Lambda l ->
(match l.l_compiled with
| Some cl when not (is_jit_failed cl) ->
(* Cached bytecode — run on VM using the closure's captured env,
not the caller's globals. Closure vars were merged at compile time. *)
(try push vm (call_closure cl args cl.vm_env_ref)
with _e ->
(* Fallback to CEK — data-dependent error, not a JIT bug.
Dedup logging happens in register_jit_hook. *)
push vm (Sx_ref.cek_call f (List args)))
| Some _ ->
(* Compile failed — CEK *)
push vm (Sx_ref.cek_call f (List args))
| None ->
if l.l_name <> None
then begin
(* Pre-mark before compile attempt to prevent re-entrancy *)
l.l_compiled <- Some jit_failed_sentinel;
match !jit_compile_ref l vm.globals with
| Some cl ->
l.l_compiled <- Some cl;
(try push vm (call_closure cl args cl.vm_env_ref)
with _e -> push vm (Sx_ref.cek_call f (List args)))
| None ->
push vm (Sx_ref.cek_call f (List args))
end
else
push vm (Sx_ref.cek_call f (List args)))
| Component _ | Island _ ->
(* Components use keyword-arg parsing — CEK handles this *)
incr _vm_cek_count;
let result = Sx_ref.cek_call f (List args) in
push vm result
| _ ->
raise (Eval_error ("VM: not callable: " ^ Sx_runtime.value_to_str f))
(** Main execution loop — iterative (no OCaml stack growth).
VmClosure calls push frames; the loop picks them up.
OP_TAIL_CALL + VmClosure = true TCO: drop frame, push new, loop. *)
and run vm =
while vm.frames <> [] do
match vm.frames with
| [] -> () (* guard handled by while condition *)
| frame :: rest_frames ->
let bc = frame.closure.vm_code.vc_bytecode in
let consts = frame.closure.vm_code.vc_constants in
if frame.ip >= Array.length bc then begin
(* Bytecode exhausted without explicit RETURN — pop frame like RETURN *)
let fn_name = match frame.closure.vm_name with Some n -> n | None -> "?" in
Printf.eprintf "[vm] WARN: bytecode exhausted without RETURN in %s (base=%d sp=%d frames=%d)\n%!"
fn_name frame.base vm.sp (List.length rest_frames);
let result = if vm.sp > frame.base then pop vm else Nil in
vm.frames <- rest_frames;
vm.sp <- frame.base;
if rest_frames <> [] then push vm result
(* If no more frames, result stays on stack for call_closure to pop *)
end
else begin
let saved_ip = frame.ip in
let op = bc.(frame.ip) in
frame.ip <- frame.ip + 1;
(try match op with
(* ---- Constants ---- *)
| 1 (* OP_CONST *) ->
let idx = read_u16 frame in
if idx >= Array.length consts then
raise (Eval_error (Printf.sprintf "VM: CONST index %d out of bounds (pool size %d)"
idx (Array.length consts)));
push vm consts.(idx)
| 2 (* OP_NIL *) -> push vm Nil
| 3 (* OP_TRUE *) -> push vm (Bool true)
| 4 (* OP_FALSE *) -> push vm (Bool false)
| 5 (* OP_POP *) -> ignore (pop vm)
| 6 (* OP_DUP *) -> push vm (peek vm)
(* ---- Variable access ---- *)
| 16 (* OP_LOCAL_GET *) ->
let slot = read_u8 frame in
let v = match Hashtbl.find_opt frame.local_cells slot with
| Some cell -> cell.uv_value
| None ->
let idx = frame.base + slot in
if idx >= vm.sp then
raise (Eval_error (Printf.sprintf
"VM: LOCAL_GET slot=%d base=%d sp=%d out of bounds" slot frame.base vm.sp));
vm.stack.(idx)
in
push vm v
| 17 (* OP_LOCAL_SET *) ->
let slot = read_u8 frame in
let v = peek vm in
(* Write to shared cell if captured, else to stack *)
(match Hashtbl.find_opt frame.local_cells slot with
| Some cell -> cell.uv_value <- v
| None -> vm.stack.(frame.base + slot) <- v)
| 18 (* OP_UPVALUE_GET *) ->
let idx = read_u8 frame in
if idx >= Array.length frame.closure.vm_upvalues then
raise (Eval_error (Printf.sprintf
"VM: UPVALUE_GET idx=%d out of bounds (have %d)" idx
(Array.length frame.closure.vm_upvalues)));
push vm frame.closure.vm_upvalues.(idx).uv_value
| 19 (* OP_UPVALUE_SET *) ->
let idx = read_u8 frame in
frame.closure.vm_upvalues.(idx).uv_value <- peek vm
| 20 (* OP_GLOBAL_GET *) ->
let idx = read_u16 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
let v = try Hashtbl.find vm.globals name with Not_found ->
(* Walk the closure env chain for inner functions *)
let id = Sx_types.intern name in
let rec env_lookup e =
try Hashtbl.find e.bindings id
with Not_found ->
match e.parent with Some p -> env_lookup p | None ->
try Sx_primitives.get_primitive name
with _ -> raise (Eval_error ("VM undefined: " ^ name))
in
match frame.closure.vm_closure_env with
| Some env -> env_lookup env
| None ->
try Sx_primitives.get_primitive name
with _ -> raise (Eval_error ("VM undefined: " ^ name))
in
push vm v
| 21 (* OP_GLOBAL_SET *) ->
let idx = read_u16 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
(* Write to closure env if the name exists there (mutable closure vars) *)
let written = match frame.closure.vm_closure_env with
| Some env ->
let id = Sx_types.intern name in
let rec find_env e =
if Hashtbl.mem e.bindings id then
(Hashtbl.replace e.bindings id (peek vm); true)
else match e.parent with Some p -> find_env p | None -> false
in find_env env
| None -> false
in
if not written then begin
let v = peek vm in
Hashtbl.replace vm.globals name v;
(match !Sx_types._vm_global_set_hook with Some f -> f name v | None -> ())
end
(* ---- Control flow ---- *)
| 32 (* OP_JUMP *) ->
let offset = read_i16 frame in
frame.ip <- frame.ip + offset
| 33 (* OP_JUMP_IF_FALSE *) ->
let offset = read_i16 frame in
let v = pop vm in
if not (sx_truthy v) then frame.ip <- frame.ip + offset
| 34 (* OP_JUMP_IF_TRUE *) ->
let offset = read_i16 frame in
let v = pop vm in
if sx_truthy v then frame.ip <- frame.ip + offset
(* ---- Function calls ---- *)
| 48 (* OP_CALL *) ->
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
let args_list = List.rev (Array.to_list args) in
vm_call vm f args_list
(* Loop continues — if VmClosure, new frame runs next iteration *)
| 49 (* OP_TAIL_CALL *) ->
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
let args_list = List.rev (Array.to_list args) in
(* Drop current frame, reuse stack space — true TCO for VmClosure *)
vm.frames <- rest_frames;
vm.sp <- frame.base;
vm_call vm f args_list
| 50 (* OP_RETURN *) ->
let result = pop vm in
vm.frames <- rest_frames;
vm.sp <- frame.base;
push vm result
(* Loop continues with caller frame *)
| 51 (* OP_CLOSURE *) ->
let idx = read_u16 frame in
if idx >= Array.length consts then
raise (Eval_error (Printf.sprintf "VM: CLOSURE idx %d >= consts %d" idx (Array.length consts)));
let code_val = consts.(idx) in
let code = code_from_value code_val in
(* Read upvalue descriptors from bytecode *)
let uv_count = match code_val with
| Dict d -> (match Hashtbl.find_opt d "upvalue-count" with
| Some (Number n) -> int_of_float n | _ -> 0)
| _ -> 0
in
let upvalues = Array.init uv_count (fun _ ->
let is_local = read_u8 frame in
let index = read_u8 frame in
if is_local = 1 then begin
(* Capture from enclosing frame's local slot.
Create a shared cell — both parent and closure
read/write through this cell. *)
let cell = match Hashtbl.find_opt frame.local_cells index with
| Some existing -> existing (* reuse existing cell *)
| None ->
let c = { uv_value = vm.stack.(frame.base + index) } in
Hashtbl.replace frame.local_cells index c;
c
in
cell
end else
(* Capture from enclosing frame's upvalue — already a shared cell *)
frame.closure.vm_upvalues.(index)
) in
let cl = { vm_code = code; vm_upvalues = upvalues; vm_name = None;
vm_env_ref = vm.globals; vm_closure_env = None } in
push vm (VmClosure cl)
| 52 (* OP_CALL_PRIM *) ->
let idx = read_u16 frame in
let argc = read_u8 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
let args = List.init argc (fun _ -> pop vm) |> List.rev in
(* Resolve thunks — the CEK evaluator does this automatically
via trampoline, but the VM must do it explicitly before
passing args to primitives. *)
let args = List.map (fun v ->
match v with
| Thunk _ -> !Sx_primitives._sx_trampoline_fn v
| _ -> v) args in
let result =
try
(* Check primitives FIRST (native implementations of map/filter/etc.),
then globals (which may have ho_via_cek wrappers that route
through the CEK — these can't call VM closures). *)
let fn_val = try Sx_primitives.get_primitive name with _ ->
try Hashtbl.find vm.globals name with Not_found ->
raise (Eval_error ("VM: unknown primitive " ^ name))
in
(match fn_val with
| NativeFn (_, fn) -> fn args
| _ -> Nil)
with Eval_error msg ->
raise (Eval_error (Printf.sprintf "%s (in CALL_PRIM \"%s\" with %d args)"
msg name argc))
in
push vm result
(* ---- Collections ---- *)
| 64 (* OP_LIST *) ->
let count = read_u16 frame in
let items = List.init count (fun _ -> pop vm) |> List.rev in
push vm (List items)
| 65 (* OP_DICT *) ->
let count = read_u16 frame in
let d = Hashtbl.create count in
for _ = 1 to count do
let v = pop vm in
let k = pop vm in
let key = match k with String s -> s | Keyword s -> s | _ -> Sx_runtime.value_to_str k in
Hashtbl.replace d key v
done;
push vm (Dict d)
(* ---- String ops ---- *)
| 144 (* OP_STR_CONCAT *) ->
let count = read_u8 frame in
let parts = List.init count (fun _ -> pop vm) |> List.rev in
let s = String.concat "" (List.map Sx_runtime.value_to_str parts) in
push vm (String s)
(* ---- Define ---- *)
| 128 (* OP_DEFINE *) ->
let idx = read_u16 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
let v = peek vm in
Hashtbl.replace vm.globals name v
(* ---- Inline primitives ----
Fast path for common types; fallback to actual primitive
for edge cases (type coercion, thunks, RawHTML, etc.)
to guarantee behavioral parity with CALL_PRIM. *)
| 160 (* OP_ADD *) ->
let b = pop vm and a = pop vm in
push vm (match a, b with
| Number x, Number y -> Number (x +. y)
| _ -> (Hashtbl.find Sx_primitives.primitives "+") [a; b])
| 161 (* OP_SUB *) ->
let b = pop vm and a = pop vm in
push vm (match a, b with
| Number x, Number y -> Number (x -. y)
| _ -> (Hashtbl.find Sx_primitives.primitives "-") [a; b])
| 162 (* OP_MUL *) ->
let b = pop vm and a = pop vm in
push vm (match a, b with
| Number x, Number y -> Number (x *. y)
| _ -> (Hashtbl.find Sx_primitives.primitives "*") [a; b])
| 163 (* OP_DIV *) ->
let b = pop vm and a = pop vm in
push vm (match a, b with
| Number x, Number y -> Number (x /. y)
| _ -> (Hashtbl.find Sx_primitives.primitives "/") [a; b])
| 164 (* OP_EQ *) ->
let b = pop vm and a = pop vm in
let rec norm = function
| ListRef { contents = l } -> List (List.map norm l)
| List l -> List (List.map norm l) | v -> v in
push vm (Bool (norm a = norm b))
| 165 (* OP_LT *) ->
let b = pop vm and a = pop vm in
push vm (match a, b with
| Number x, Number y -> Bool (x < y)
| String x, String y -> Bool (x < y)
| _ -> (Hashtbl.find Sx_primitives.primitives "<") [a; b])
| 166 (* OP_GT *) ->
let b = pop vm and a = pop vm in
push vm (match a, b with
| Number x, Number y -> Bool (x > y)
| String x, String y -> Bool (x > y)
| _ -> (Hashtbl.find Sx_primitives.primitives ">") [a; b])
| 167 (* OP_NOT *) ->
let v = pop vm in
push vm (Bool (not (sx_truthy v)))
| 168 (* OP_LEN *) ->
let v = pop vm in
push vm (match v with
| List l | ListRef { contents = l } -> Number (float_of_int (List.length l))
| String s -> Number (float_of_int (String.length s))
| Dict d -> Number (float_of_int (Hashtbl.length d))
| Nil -> Number 0.0
| _ -> (Hashtbl.find Sx_primitives.primitives "len") [v])
| 169 (* OP_FIRST *) ->
let v = pop vm in
push vm (match v with
| List (x :: _) | ListRef { contents = x :: _ } -> x
| List [] | ListRef { contents = [] } | Nil -> Nil
| _ -> (Hashtbl.find Sx_primitives.primitives "first") [v])
| 170 (* OP_REST *) ->
let v = pop vm in
push vm (match v with
| List (_ :: xs) | ListRef { contents = _ :: xs } -> List xs
| List [] | ListRef { contents = [] } | Nil -> List []
| _ -> (Hashtbl.find Sx_primitives.primitives "rest") [v])
| 171 (* OP_NTH *) ->
let n = pop vm and coll = pop vm in
push vm (match coll, n with
| (List l | ListRef { contents = l }), Number f ->
(try List.nth l (int_of_float f) with _ -> Nil)
| String s, Number f ->
let i = int_of_float f in
if i >= 0 && i < String.length s then String (String.make 1 s.[i])
else Nil
| _ -> (Hashtbl.find Sx_primitives.primitives "nth") [coll; n])
| 172 (* OP_CONS *) ->
let coll = pop vm and x = pop vm in
push vm (match coll with
| List l -> List (x :: l)
| ListRef { contents = l } -> List (x :: l)
| Nil -> List [x]
| _ -> (Hashtbl.find Sx_primitives.primitives "cons") [x; coll])
| 173 (* OP_NEG *) ->
let v = pop vm in
push vm (match v with
| Number x -> Number (-.x)
| _ -> (Hashtbl.find Sx_primitives.primitives "-") [v])
| 174 (* OP_INC *) ->
let v = pop vm in
push vm (match v with
| Number x -> Number (x +. 1.0)
| _ -> (Hashtbl.find Sx_primitives.primitives "inc") [v])
| 175 (* OP_DEC *) ->
let v = pop vm in
push vm (match v with
| Number x -> Number (x -. 1.0)
| _ -> (Hashtbl.find Sx_primitives.primitives "dec") [v])
| opcode ->
raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d"
opcode (frame.ip - 1)))
with Invalid_argument msg ->
let fn_name = match frame.closure.vm_name with Some n -> n | None -> "?" in
raise (Eval_error (Printf.sprintf
"VM: %s at ip=%d op=%d in %s (base=%d sp=%d bc_len=%d consts=%d)"
msg saved_ip op fn_name frame.base vm.sp
(Array.length bc) (Array.length consts))))
end
done
(** Execute a compiled module (top-level bytecode). *)
let execute_module code globals =
let cl = { vm_code = code; vm_upvalues = [||]; vm_name = Some "module"; vm_env_ref = globals; vm_closure_env = None } in
let vm = create globals in
let frame = { closure = cl; ip = 0; base = 0; local_cells = Hashtbl.create 4 } in
for _ = 0 to code.vc_locals - 1 do push vm Nil done;
vm.frames <- [frame];
run vm;
pop vm
(** {1 Lazy JIT compilation} *)
(** Compile a lambda or component body to bytecode using the SX compiler.
Invokes [compile] from spec/compiler.sx via the CEK machine.
Returns a [vm_closure] ready for execution, or [None] on failure
(safe fallback to CEK interpretation).
The compilation cost is a single CEK evaluation of the compiler —
microseconds per function. The result is cached in the lambda/component
record so subsequent calls go straight to the VM. *)
let jit_compile_lambda (l : lambda) globals =
let fn_name = match l.l_name with Some n -> n | None -> "<anon>" in
try
let compile_fn = try Hashtbl.find globals "compile"
with Not_found -> raise (Eval_error "JIT: compiler not loaded") in
(* Reconstruct the (fn (params) body) form so the compiler produces
a proper closure. l.l_body is the inner body; we need the full
function form with params so the compiled code binds them. *)
let param_syms = List (List.map (fun s -> Symbol s) l.l_params) in
let fn_expr = List [Symbol "fn"; param_syms; l.l_body] in
let quoted = List [Symbol "quote"; fn_expr] in
let result = match compile_fn with
| VmClosure cl ->
(* Compiler loaded as bytecode — call through VM directly *)
call_closure cl [quoted] globals
| _ ->
(* Compiler loaded from source — call through CEK *)
Sx_ref.eval_expr (List [compile_fn; quoted]) (Env (make_env ())) in
(* Inject closure bindings into globals so GLOBAL_GET can find them.
Only injects values not already present in globals (preserves
existing defines). Mutable closure vars get stale snapshots here
but GLOBAL_SET writes back to vm_closure_env, and GLOBAL_GET
falls through to vm_closure_env if the global is stale. *)
let effective_globals =
let closure = l.l_closure in
let count = ref 0 in
let rec inject env =
Hashtbl.iter (fun id v ->
let name = Sx_types.unintern id in
if not (Hashtbl.mem globals name) then begin
Hashtbl.replace globals name v;
incr count
end
) env.bindings;
match env.parent with Some p -> inject p | None -> ()
in
if Hashtbl.length closure.bindings > 0 || closure.parent <> None then
inject closure;
if !count > 0 then
Printf.eprintf "[jit] %s: injected %d closure bindings\n%!" fn_name !count;
globals
in
(match result with
| Dict d when Hashtbl.mem d "bytecode" ->
let outer_code = code_from_value result in
let bc = outer_code.vc_bytecode in
if Array.length bc >= 4 && bc.(0) = 51 (* OP_CLOSURE *) then begin
let idx = bc.(1) lor (bc.(2) lsl 8) in
if idx < Array.length outer_code.vc_constants then
let inner_val = outer_code.vc_constants.(idx) in
let code = code_from_value inner_val in
Some { vm_code = code; vm_upvalues = [||];
vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure }
else begin
Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!"
fn_name idx (Array.length outer_code.vc_constants);
None
end
end else begin
(* Not a closure — constant expression, alias, or simple computation.
Execute the bytecode as a module to get the value, then wrap
as a NativeFn if it's callable (so the CEK can dispatch to it). *)
(try
let value = execute_module outer_code globals in
ignore (fn_name, value, bc); (* resolved — not a closure, CEK handles it *)
(* If the resolved value is a NativeFn, we can't wrap it as a
vm_closure — just let the CEK handle it directly. Return None
so the lambda falls through to CEK, which will find the
resolved value in the env on next lookup. *)
None
with _ ->
ignore fn_name; (* non-closure, execution failed — CEK fallback *)
None)
end
| _ ->
Printf.eprintf "[jit] FAIL %s: compiler returned %s\n%!" fn_name (type_of result);
None)
with e ->
Printf.eprintf "[jit] FAIL %s: %s\n%!" fn_name (Printexc.to_string e);
None
(* Wire up forward references *)
let () = jit_compile_ref := jit_compile_lambda
let () = _vm_call_closure_ref := (fun cl args -> call_closure cl args cl.vm_env_ref)
(** {1 Debugging / introspection} *)
(** Map opcode integer to human-readable name. *)
let opcode_name = function
| 1 -> "CONST" | 2 -> "NIL" | 3 -> "TRUE" | 4 -> "FALSE"
| 5 -> "POP" | 6 -> "DUP"
| 16 -> "LOCAL_GET" | 17 -> "LOCAL_SET"
| 18 -> "UPVALUE_GET" | 19 -> "UPVALUE_SET"
| 20 -> "GLOBAL_GET" | 21 -> "GLOBAL_SET"
| 32 -> "JUMP" | 33 -> "JUMP_IF_FALSE" | 34 -> "JUMP_IF_TRUE"
| 48 -> "CALL" | 49 -> "TAIL_CALL" | 50 -> "RETURN"
| 51 -> "CLOSURE" | 52 -> "CALL_PRIM"
| 64 -> "LIST" | 65 -> "DICT"
| 128 -> "DEFINE"
| 144 -> "STR_CONCAT"
| 160 -> "ADD" | 161 -> "SUB" | 162 -> "MUL" | 163 -> "DIV"
| 164 -> "EQ" | 165 -> "LT" | 166 -> "GT" | 167 -> "NOT"
| 168 -> "LEN" | 169 -> "FIRST" | 170 -> "REST" | 171 -> "NTH"
| 172 -> "CONS" | 173 -> "NEG" | 174 -> "INC" | 175 -> "DEC"
| n -> Printf.sprintf "UNKNOWN_%d" n
(** Number of extra operand bytes consumed by each opcode.
Returns (format, total_bytes) where format describes the operand types. *)
let opcode_operand_size = function
| 1 (* CONST *) | 20 (* GLOBAL_GET *) | 21 (* GLOBAL_SET *)
| 64 (* LIST *) | 65 (* DICT *) | 128 (* DEFINE *) -> 2 (* u16 *)
| 16 (* LOCAL_GET *) | 17 (* LOCAL_SET *)
| 18 (* UPVALUE_GET *) | 19 (* UPVALUE_SET *)
| 48 (* CALL *) | 49 (* TAIL_CALL *)
| 144 (* STR_CONCAT *) -> 1 (* u8 *)
| 32 (* JUMP *) | 33 (* JUMP_IF_FALSE *) | 34 (* JUMP_IF_TRUE *) -> 2 (* i16 *)
| 51 (* CLOSURE *) -> 2 (* u16 for constant index; upvalue descriptors follow dynamically *)
| 52 (* CALL_PRIM *) -> 3 (* u16 + u8 *)
| _ -> 0 (* no operand *)
(** Trace a single execution — compile + run, collecting trace entries.
Each entry is a dict with :opcode, :stack, :depth. *)
let trace_run src globals =
(* Compile *)
let compile_fn = try Hashtbl.find globals "compile"
with Not_found -> raise (Eval_error "trace: compiler not loaded") in
let exprs = Sx_parser.parse_all src in
let expr = match exprs with [e] -> e | _ -> List (Symbol "do" :: exprs) in
let quoted = List [Symbol "quote"; expr] in
let code_val = Sx_ref.eval_expr (List [compile_fn; quoted]) (Env (make_env ())) in
let code = code_from_value code_val in
let cl = { vm_code = code; vm_upvalues = [||]; vm_name = Some "trace";
vm_env_ref = globals; vm_closure_env = None } in
let vm = create globals in
let frame0 = { closure = cl; ip = 0; base = 0; local_cells = Hashtbl.create 4 } in
for _ = 0 to code.vc_locals - 1 do push vm Nil done;
vm.frames <- [frame0];
(* Run with tracing *)
let trace = ref [] in
let max_steps = 10000 in
let steps = ref 0 in
(try
while vm.frames <> [] && !steps < max_steps do
match vm.frames with
| [] -> ()
| frame :: _ ->
let bc = frame.closure.vm_code.vc_bytecode in
if frame.ip >= Array.length bc then
vm.frames <- []
else begin
let op = bc.(frame.ip) in
(* Snapshot stack top 5 *)
let stack_snap = List.init (min 5 vm.sp) (fun i ->
let v = vm.stack.(vm.sp - 1 - i) in
String (Sx_types.inspect v)) in
let entry = Hashtbl.create 4 in
Hashtbl.replace entry "opcode" (String (opcode_name op));
Hashtbl.replace entry "stack" (List stack_snap);
Hashtbl.replace entry "depth" (Number (float_of_int (List.length vm.frames)));
trace := Dict entry :: !trace;
incr steps;
(* Execute one step — use the main run loop for 1 step.
We do this by saving the state and running the original dispatch. *)
let saved_ip = frame.ip in
frame.ip <- frame.ip + 1;
let rest_frames = List.tl vm.frames in
(try match op with
| 1 -> let idx = read_u16 frame in push vm frame.closure.vm_code.vc_constants.(idx)
| 2 -> push vm Nil
| 3 -> push vm (Bool true)
| 4 -> push vm (Bool false)
| 5 -> ignore (pop vm)
| 6 -> push vm (peek vm)
| 16 -> let slot = read_u8 frame in
let v = match Hashtbl.find_opt frame.local_cells slot with
| Some cell -> cell.uv_value
| None -> vm.stack.(frame.base + slot) in
push vm v
| 17 -> let slot = read_u8 frame in let v = peek vm in
(match Hashtbl.find_opt frame.local_cells slot with
| Some cell -> cell.uv_value <- v
| None -> vm.stack.(frame.base + slot) <- v)
| 18 -> let idx = read_u8 frame in
push vm frame.closure.vm_upvalues.(idx).uv_value
| 19 -> let idx = read_u8 frame in
frame.closure.vm_upvalues.(idx).uv_value <- peek vm
| 20 -> let idx = read_u16 frame in
let name = match frame.closure.vm_code.vc_constants.(idx) with String s -> s | _ -> "" in
let v = try Hashtbl.find vm.globals name with Not_found ->
try Sx_primitives.get_primitive name with _ ->
raise (Eval_error ("VM undefined: " ^ name)) in
push vm v
| 21 -> let idx = read_u16 frame in
let name = match frame.closure.vm_code.vc_constants.(idx) with String s -> s | _ -> "" in
Hashtbl.replace vm.globals name (peek vm)
| 32 -> let offset = read_i16 frame in frame.ip <- frame.ip + offset
| 33 -> let offset = read_i16 frame in let v = pop vm in
if not (sx_truthy v) then frame.ip <- frame.ip + offset
| 34 -> let offset = read_i16 frame in let v = pop vm in
if sx_truthy v then frame.ip <- frame.ip + offset
| 48 -> let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
vm_call vm f (List.rev (Array.to_list args))
| 49 -> let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
vm.frames <- rest_frames; vm.sp <- frame.base;
vm_call vm f (List.rev (Array.to_list args))
| 50 -> let result = pop vm in
vm.frames <- rest_frames; vm.sp <- frame.base; push vm result
| 51 -> (* CLOSURE — skip for trace, just advance past upvalue descriptors *)
let idx = read_u16 frame in
let code_val2 = frame.closure.vm_code.vc_constants.(idx) in
let uv_count = match code_val2 with
| Dict d -> (match Hashtbl.find_opt d "upvalue-count" with
| Some (Number n) -> int_of_float n | _ -> 0)
| _ -> 0 in
let upvalues = Array.init uv_count (fun _ ->
let is_local = read_u8 frame in
let index = read_u8 frame in
if is_local = 1 then begin
let cell = match Hashtbl.find_opt frame.local_cells index with
| Some existing -> existing
| None ->
let c = { uv_value = vm.stack.(frame.base + index) } in
Hashtbl.replace frame.local_cells index c; c in
cell
end else frame.closure.vm_upvalues.(index)
) in
let inner_code = code_from_value code_val2 in
let c = { vm_code = inner_code; vm_upvalues = upvalues; vm_name = None;
vm_env_ref = vm.globals; vm_closure_env = None } in
push vm (VmClosure c)
| 52 -> let idx = read_u16 frame in let argc = read_u8 frame in
let name = match frame.closure.vm_code.vc_constants.(idx) with String s -> s | _ -> "" in
let args = List.init argc (fun _ -> pop vm) |> List.rev in
let fn_val = try Sx_primitives.get_primitive name with _ ->
try Hashtbl.find vm.globals name with Not_found ->
raise (Eval_error ("VM: unknown primitive " ^ name)) in
(match fn_val with NativeFn (_, fn) -> push vm (fn args) | _ -> push vm Nil)
| 64 -> let count = read_u16 frame in
let items = List.init count (fun _ -> pop vm) |> List.rev in
push vm (List items)
| 65 -> let count = read_u16 frame in
let d = Hashtbl.create count in
for _ = 1 to count do let v = pop vm in let k = pop vm in
let key = match k with String s -> s | Keyword s -> s | _ -> Sx_runtime.value_to_str k in
Hashtbl.replace d key v done;
push vm (Dict d)
| 128 -> let idx = read_u16 frame in
let name = match frame.closure.vm_code.vc_constants.(idx) with String s -> s | _ -> "" in
Hashtbl.replace vm.globals name (peek vm)
| 144 -> let count = read_u8 frame in
let parts = List.init count (fun _ -> pop vm) |> List.rev in
push vm (String (String.concat "" (List.map Sx_runtime.value_to_str parts)))
| 160 -> let b = pop vm and a = pop vm in
push vm (match a, b with Number x, Number y -> Number (x +. y) | _ -> Nil)
| 161 -> let b = pop vm and a = pop vm in
push vm (match a, b with Number x, Number y -> Number (x -. y) | _ -> Nil)
| 162 -> let b = pop vm and a = pop vm in
push vm (match a, b with Number x, Number y -> Number (x *. y) | _ -> Nil)
| 163 -> let b = pop vm and a = pop vm in
push vm (match a, b with Number x, Number y -> Number (x /. y) | _ -> Nil)
| 164 -> let b = pop vm and a = pop vm in push vm (Bool (a = b))
| 165 -> let b = pop vm and a = pop vm in
push vm (match a, b with Number x, Number y -> Bool (x < y) | _ -> Bool false)
| 166 -> let b = pop vm and a = pop vm in
push vm (match a, b with Number x, Number y -> Bool (x > y) | _ -> Bool false)
| 167 -> let v = pop vm in push vm (Bool (not (sx_truthy v)))
| 168 -> let v = pop vm in
push vm (match v with
| List l | ListRef { contents = l } -> Number (float_of_int (List.length l))
| String s -> Number (float_of_int (String.length s))
| _ -> Number 0.0)
| 169 -> let v = pop vm in
push vm (match v with List (x :: _) | ListRef { contents = x :: _ } -> x | _ -> Nil)
| 170 -> let v = pop vm in
push vm (match v with
| List (_ :: xs) | ListRef { contents = _ :: xs } -> List xs | _ -> List [])
| 171 -> let n = pop vm and coll = pop vm in
push vm (match coll, n with
| (List l | ListRef { contents = l }), Number f ->
(try List.nth l (int_of_float f) with _ -> Nil) | _ -> Nil)
| 172 -> let coll = pop vm and x = pop vm in
push vm (match coll with List l -> List (x :: l) | _ -> List [x])
| 173 -> let v = pop vm in
push vm (match v with Number x -> Number (-.x) | _ -> Nil)
| 174 -> let v = pop vm in
push vm (match v with Number x -> Number (x +. 1.0) | _ -> Nil)
| 175 -> let v = pop vm in
push vm (match v with Number x -> Number (x -. 1.0) | _ -> Nil)
| _ -> ()
with e ->
let _ = e in
ignore saved_ip;
(* On error during trace, just stop *)
vm.frames <- [])
end
done
with _ -> ());
List (List.rev !trace)
(** Disassemble a vm_code into a list of instruction dicts. *)
let disassemble (code : vm_code) =
let bc = code.vc_bytecode in
let len = Array.length bc in
let consts = code.vc_constants in
let instrs = ref [] in
let ip = ref 0 in
while !ip < len do
let offset = !ip in
let op = bc.(!ip) in
ip := !ip + 1;
let name = opcode_name op in
let operands = ref [] in
(match op with
| 1 (* CONST *) | 20 (* GLOBAL_GET *) | 21 (* GLOBAL_SET *)
| 128 (* DEFINE *) ->
if !ip + 1 < len then begin
let lo = bc.(!ip) in let hi = bc.(!ip + 1) in
let idx = lo lor (hi lsl 8) in
ip := !ip + 2;
let const_str = if idx < Array.length consts
then Sx_types.inspect consts.(idx) else "?" in
operands := [Number (float_of_int idx); String const_str]
end
| 64 (* LIST *) | 65 (* DICT *) | 51 (* CLOSURE *) ->
if !ip + 1 < len then begin
let lo = bc.(!ip) in let hi = bc.(!ip + 1) in
let idx = lo lor (hi lsl 8) in
ip := !ip + 2;
operands := [Number (float_of_int idx)];
(* For CLOSURE, skip upvalue descriptors *)
if op = 51 && idx < Array.length consts then begin
let uv_count = match consts.(idx) with
| Dict d -> (match Hashtbl.find_opt d "upvalue-count" with
| Some (Number n) -> int_of_float n | _ -> 0)
| _ -> 0 in
ip := !ip + uv_count * 2
end
end
| 16 (* LOCAL_GET *) | 17 (* LOCAL_SET *)
| 18 (* UPVALUE_GET *) | 19 (* UPVALUE_SET *)
| 48 (* CALL *) | 49 (* TAIL_CALL *)
| 144 (* STR_CONCAT *) ->
if !ip < len then begin
let v = bc.(!ip) in ip := !ip + 1;
operands := [Number (float_of_int v)]
end
| 32 (* JUMP *) | 33 (* JUMP_IF_FALSE *) | 34 (* JUMP_IF_TRUE *) ->
if !ip + 1 < len then begin
let lo = bc.(!ip) in let hi = bc.(!ip + 1) in
let raw = lo lor (hi lsl 8) in
let signed = if raw >= 32768 then raw - 65536 else raw in
ip := !ip + 2;
operands := [Number (float_of_int signed)]
end
| 52 (* CALL_PRIM *) ->
if !ip + 2 < len then begin
let lo = bc.(!ip) in let hi = bc.(!ip + 1) in
let idx = lo lor (hi lsl 8) in
let argc = bc.(!ip + 2) in
ip := !ip + 3;
let prim_name = if idx < Array.length consts
then (match consts.(idx) with String s -> s | _ -> "?") else "?" in
operands := [Number (float_of_int idx); String prim_name; Number (float_of_int argc)]
end
| _ -> ());
let entry = Hashtbl.create 4 in
Hashtbl.replace entry "offset" (Number (float_of_int offset));
Hashtbl.replace entry "opcode" (String name);
Hashtbl.replace entry "operands" (List !operands);
instrs := Dict entry :: !instrs
done;
let result = Hashtbl.create 4 in
Hashtbl.replace result "arity" (Number (float_of_int code.vc_arity));
Hashtbl.replace result "num_locals" (Number (float_of_int code.vc_locals));
Hashtbl.replace result "constants" (List (Array.to_list (Array.map (fun v -> String (Sx_types.inspect v)) consts)));
Hashtbl.replace result "bytecode" (List (List.rev !instrs));
Dict result