Files
rose-ash/hosts/ocaml/lib/sx_vm.ml
giles a8d1163aa6 SX bytecode VM executing: compile → run → correct results
End-to-end pipeline working:
  Python compiler.sx → bytecode → OCaml VM → result

Verified: (+ (* 3 4) 2) → 14 ✓
          (+ 0 1 2 ... 49) → 1225 ✓

Benchmark (500 iterations, 50 additions each):
  CEK machine: 327ms
  Bytecode VM: 145ms
  Speedup: 2.2x

VM handles: constants, local variables, global variables,
primitive calls, jumps, conditionals, closures (via NativeFn
wrapper), define, return.

Protocol: (vm-exec {:bytecode (...) :constants (...)})
  - Compiler outputs clean format (no internal index dict)
  - VM converts bytecode list to int array, constants to value array
  - Stack-based execution with direct opcode dispatch

The 2.2x speedup is for pure arithmetic. For aser (the real
target), the speedup will be larger because aser involves:
- String building (no CEK frame allocation in VM)
- Map/filter iterations (no frame-per-iteration in VM)
- Closure calls (no thunk/trampoline in VM)

Next: compile and run the aser adapter on the VM.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 18:52:50 +00:00

279 lines
8.5 KiB
OCaml

(** SX bytecode VM — stack-based interpreter.
Executes bytecode produced by compiler.sx.
Designed for speed: array-based stack, direct dispatch,
no allocation per step (unlike the CEK machine).
This is the platform-native execution engine. The same bytecode
runs on all platforms (OCaml, JS, WASM). *)
open Sx_types
(** Code object — compiled function body. *)
type code = {
arity : int;
locals : int;
bytecode : int array;
constants : value array;
}
(** Closure — code + captured upvalues. *)
type vm_closure = {
code : code;
name : string option;
}
(** Call frame — one per function invocation. *)
type frame = {
closure : vm_closure;
mutable ip : int;
base : int; (* base index in value stack for locals *)
}
(** VM state. *)
type vm = {
mutable stack : value array;
mutable sp : int;
mutable frames : frame list;
globals : (string, value) Hashtbl.t;
}
let create globals =
let g = Hashtbl.create 256 in
Hashtbl.iter (fun k v -> Hashtbl.replace g k v) globals;
{ stack = Array.make 4096 Nil; sp = 0; frames = []; globals = g }
(** Stack ops — inlined for speed. *)
let push vm v =
if vm.sp >= Array.length vm.stack then begin
let ns = Array.make (vm.sp * 2) Nil in
Array.blit vm.stack 0 ns 0 vm.sp;
vm.stack <- ns
end;
vm.stack.(vm.sp) <- v;
vm.sp <- vm.sp + 1
let[@inline] pop vm =
vm.sp <- vm.sp - 1;
vm.stack.(vm.sp)
let[@inline] peek vm = vm.stack.(vm.sp - 1)
(** Read operands. *)
let[@inline] read_u8 f =
let v = f.closure.code.bytecode.(f.ip) in
f.ip <- f.ip + 1; v
let[@inline] read_u16 f =
let lo = f.closure.code.bytecode.(f.ip) in
let hi = f.closure.code.bytecode.(f.ip + 1) in
f.ip <- f.ip + 2;
lo lor (hi lsl 8)
let[@inline] read_i16 f =
let v = read_u16 f in
if v >= 32768 then v - 65536 else v
(** Main execution loop. *)
let rec run vm =
match vm.frames with
| [] -> failwith "VM: no frame"
| frame :: rest_frames ->
let bc = frame.closure.code.bytecode in
let consts = frame.closure.code.constants in
let op = bc.(frame.ip) in
frame.ip <- frame.ip + 1;
match op with
(* ---- Constants ---- *)
| 1 (* OP_CONST *) ->
let idx = read_u16 frame in
push vm consts.(idx);
run vm
| 2 (* OP_NIL *) -> push vm Nil; run vm
| 3 (* OP_TRUE *) -> push vm (Bool true); run vm
| 4 (* OP_FALSE *) -> push vm (Bool false); run vm
| 5 (* OP_POP *) -> ignore (pop vm); run vm
| 6 (* OP_DUP *) -> push vm (peek vm); run vm
(* ---- Variable access ---- *)
| 16 (* OP_LOCAL_GET *) ->
let slot = read_u8 frame in
push vm vm.stack.(frame.base + slot);
run vm
| 17 (* OP_LOCAL_SET *) ->
let slot = read_u8 frame in
vm.stack.(frame.base + slot) <- peek vm;
run vm
| 20 (* OP_GLOBAL_GET *) ->
let idx = read_u16 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
let v = try Hashtbl.find vm.globals name with Not_found ->
try Sx_primitives.get_primitive name
with _ -> raise (Eval_error ("VM undefined: " ^ name))
in
push vm v; run vm
| 21 (* OP_GLOBAL_SET *) ->
let idx = read_u16 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
Hashtbl.replace vm.globals name (peek vm);
run vm
(* ---- Control flow ---- *)
| 32 (* OP_JUMP *) ->
let offset = read_i16 frame in
frame.ip <- frame.ip + offset;
run vm
| 33 (* OP_JUMP_IF_FALSE *) ->
let offset = read_i16 frame in
let v = pop vm in
if not (sx_truthy v) then frame.ip <- frame.ip + offset;
run vm
| 34 (* OP_JUMP_IF_TRUE *) ->
let offset = read_i16 frame in
let v = pop vm in
if sx_truthy v then frame.ip <- frame.ip + offset;
run vm
(* ---- Function calls ---- *)
| 48 (* OP_CALL *) ->
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
vm_call vm f (Array.to_list (Array.of_list (List.rev (Array.to_list args))));
run vm
| 49 (* OP_TAIL_CALL *) ->
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
(* Tail call: pop current frame, reuse stack space *)
vm.frames <- rest_frames;
vm.sp <- frame.base;
vm_call vm f (Array.to_list (Array.of_list (List.rev (Array.to_list args))));
run vm
| 50 (* OP_RETURN *) ->
let result = pop vm in
vm.frames <- rest_frames;
vm.sp <- frame.base;
push vm result
(* Return to caller — don't recurse *)
| 52 (* OP_CALL_PRIM *) ->
let idx = read_u16 frame in
let argc = read_u8 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
let args = List.init argc (fun _ -> pop vm) |> List.rev in
let result = (match Sx_primitives.get_primitive name with
NativeFn (_, fn) -> fn args | _ -> Nil) in
push vm result;
run vm
(* ---- Collections ---- *)
| 64 (* OP_LIST *) ->
let count = read_u16 frame in
let items = List.init count (fun _ -> pop vm) |> List.rev in
push vm (List items);
run vm
| 65 (* OP_DICT *) ->
let count = read_u16 frame in
let d = Hashtbl.create count in
for _ = 1 to count do
let v = pop vm in
let k = pop vm in
let key = match k with String s -> s | Keyword s -> s | _ -> Sx_runtime.value_to_str k in
Hashtbl.replace d key v
done;
push vm (Dict d);
run vm
(* ---- String ops ---- *)
| 144 (* OP_STR_CONCAT *) ->
let count = read_u8 frame in
let parts = List.init count (fun _ -> pop vm) |> List.rev in
let s = String.concat "" (List.map Sx_runtime.value_to_str parts) in
push vm (String s);
run vm
(* ---- Define ---- *)
| 128 (* OP_DEFINE *) ->
let idx = read_u16 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
let v = peek vm in
Hashtbl.replace vm.globals name v;
run vm
(* ---- Closure ---- *)
| 51 (* OP_CLOSURE *) ->
let idx = read_u16 frame in
(* The constant pool entry is a code dict from the compiler *)
let code_val = consts.(idx) in
let code = code_from_value code_val in
let cl = { code; name = None } in
push vm (NativeFn ("vm-closure", fun args ->
execute_closure cl args vm.globals));
run vm
| opcode ->
raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d"
opcode (frame.ip - 1)))
and vm_call vm f args =
match f with
| NativeFn (_, fn) ->
let result = fn args in
push vm result
| Lambda _ ->
(* Call a CEK-defined lambda through the VM *)
let result = Sx_ref.cek_call f (List args) in
push vm result
| _ ->
raise (Eval_error ("VM: not callable: " ^ Sx_runtime.value_to_str f))
(** Convert compiler output (SX dict) to a code object. *)
and code_from_value v =
match v with
| Dict d ->
let bc_list = match Hashtbl.find_opt d "bytecode" with
| Some (List l | ListRef { contents = l }) ->
Array.of_list (List.map (fun x -> match x with Number n -> int_of_float n | _ -> 0) l)
| _ -> [||]
in
let entries = match Hashtbl.find_opt d "constants" with
| Some (List l | ListRef { contents = l }) -> Array.of_list l
| _ -> [||]
in
(* Recursively convert nested code objects in the pool *)
let constants = Array.map (fun entry ->
match entry with
| Dict ed when Hashtbl.mem ed "bytecode" ->
(* Nested code object — keep as Dict for lazy conversion *)
entry
| _ -> entry
) entries in
let arity = match Hashtbl.find_opt d "arity" with
| Some (Number n) -> int_of_float n | _ -> 0
in
{ arity; locals = arity + 16; bytecode = bc_list; constants }
| _ -> { arity = 0; locals = 16; bytecode = [||]; constants = [||] }
(** Execute a closure with arguments. *)
and execute_closure cl args globals =
let vm = create globals in
let frame = { closure = cl; ip = 0; base = vm.sp } in
(* Push args as locals *)
List.iter (fun a -> push vm a) args;
(* Pad remaining locals with nil *)
for _ = List.length args to cl.code.locals - 1 do push vm Nil done;
vm.frames <- [frame];
run vm;
pop vm
(** Execute a compiled module (top-level bytecode). *)
let execute_module code globals =
let cl = { code; name = Some "module" } in
let vm = create globals in
let frame = { closure = cl; ip = 0; base = 0 } in
(* Pad locals *)
for _ = 0 to code.locals - 1 do push vm Nil done;
vm.frames <- [frame];
run vm;
pop vm