(** SX bytecode VM — stack-based interpreter. Executes bytecode produced by compiler.sx. Designed for speed: array-based stack, direct dispatch, no allocation per step (unlike the CEK machine). This is the platform-native execution engine. The same bytecode runs on all platforms (OCaml, JS, WASM). *) open Sx_types (** Code object — compiled function body. *) type code = { arity : int; locals : int; bytecode : int array; constants : value array; } (** Closure — code + captured upvalues. *) type vm_closure = { code : code; name : string option; } (** Call frame — one per function invocation. *) type frame = { closure : vm_closure; mutable ip : int; base : int; (* base index in value stack for locals *) } (** VM state. *) type vm = { mutable stack : value array; mutable sp : int; mutable frames : frame list; globals : (string, value) Hashtbl.t; } let create globals = let g = Hashtbl.create 256 in Hashtbl.iter (fun k v -> Hashtbl.replace g k v) globals; { stack = Array.make 4096 Nil; sp = 0; frames = []; globals = g } (** Stack ops — inlined for speed. *) let push vm v = if vm.sp >= Array.length vm.stack then begin let ns = Array.make (vm.sp * 2) Nil in Array.blit vm.stack 0 ns 0 vm.sp; vm.stack <- ns end; vm.stack.(vm.sp) <- v; vm.sp <- vm.sp + 1 let[@inline] pop vm = vm.sp <- vm.sp - 1; vm.stack.(vm.sp) let[@inline] peek vm = vm.stack.(vm.sp - 1) (** Read operands. *) let[@inline] read_u8 f = let v = f.closure.code.bytecode.(f.ip) in f.ip <- f.ip + 1; v let[@inline] read_u16 f = let lo = f.closure.code.bytecode.(f.ip) in let hi = f.closure.code.bytecode.(f.ip + 1) in f.ip <- f.ip + 2; lo lor (hi lsl 8) let[@inline] read_i16 f = let v = read_u16 f in if v >= 32768 then v - 65536 else v (** Main execution loop. *) let rec run vm = match vm.frames with | [] -> failwith "VM: no frame" | frame :: rest_frames -> let bc = frame.closure.code.bytecode in let consts = frame.closure.code.constants in let op = bc.(frame.ip) in frame.ip <- frame.ip + 1; match op with (* ---- Constants ---- *) | 1 (* OP_CONST *) -> let idx = read_u16 frame in push vm consts.(idx); run vm | 2 (* OP_NIL *) -> push vm Nil; run vm | 3 (* OP_TRUE *) -> push vm (Bool true); run vm | 4 (* OP_FALSE *) -> push vm (Bool false); run vm | 5 (* OP_POP *) -> ignore (pop vm); run vm | 6 (* OP_DUP *) -> push vm (peek vm); run vm (* ---- Variable access ---- *) | 16 (* OP_LOCAL_GET *) -> let slot = read_u8 frame in push vm vm.stack.(frame.base + slot); run vm | 17 (* OP_LOCAL_SET *) -> let slot = read_u8 frame in vm.stack.(frame.base + slot) <- peek vm; run vm | 20 (* OP_GLOBAL_GET *) -> let idx = read_u16 frame in let name = match consts.(idx) with String s -> s | _ -> "" in let v = try Hashtbl.find vm.globals name with Not_found -> try Sx_primitives.get_primitive name with _ -> raise (Eval_error ("VM undefined: " ^ name)) in push vm v; run vm | 21 (* OP_GLOBAL_SET *) -> let idx = read_u16 frame in let name = match consts.(idx) with String s -> s | _ -> "" in Hashtbl.replace vm.globals name (peek vm); run vm (* ---- Control flow ---- *) | 32 (* OP_JUMP *) -> let offset = read_i16 frame in frame.ip <- frame.ip + offset; run vm | 33 (* OP_JUMP_IF_FALSE *) -> let offset = read_i16 frame in let v = pop vm in if not (sx_truthy v) then frame.ip <- frame.ip + offset; run vm | 34 (* OP_JUMP_IF_TRUE *) -> let offset = read_i16 frame in let v = pop vm in if sx_truthy v then frame.ip <- frame.ip + offset; run vm (* ---- Function calls ---- *) | 48 (* OP_CALL *) -> let argc = read_u8 frame in let args = Array.init argc (fun _ -> pop vm) in let f = pop vm in vm_call vm f (Array.to_list (Array.of_list (List.rev (Array.to_list args)))); run vm | 49 (* OP_TAIL_CALL *) -> let argc = read_u8 frame in let args = Array.init argc (fun _ -> pop vm) in let f = pop vm in (* Tail call: pop current frame, reuse stack space *) vm.frames <- rest_frames; vm.sp <- frame.base; vm_call vm f (Array.to_list (Array.of_list (List.rev (Array.to_list args)))); run vm | 50 (* OP_RETURN *) -> let result = pop vm in vm.frames <- rest_frames; vm.sp <- frame.base; push vm result (* Return to caller — don't recurse *) | 52 (* OP_CALL_PRIM *) -> let idx = read_u16 frame in let argc = read_u8 frame in let name = match consts.(idx) with String s -> s | _ -> "" in let args = List.init argc (fun _ -> pop vm) |> List.rev in let result = (match Sx_primitives.get_primitive name with NativeFn (_, fn) -> fn args | _ -> Nil) in push vm result; run vm (* ---- Collections ---- *) | 64 (* OP_LIST *) -> let count = read_u16 frame in let items = List.init count (fun _ -> pop vm) |> List.rev in push vm (List items); run vm | 65 (* OP_DICT *) -> let count = read_u16 frame in let d = Hashtbl.create count in for _ = 1 to count do let v = pop vm in let k = pop vm in let key = match k with String s -> s | Keyword s -> s | _ -> Sx_runtime.value_to_str k in Hashtbl.replace d key v done; push vm (Dict d); run vm (* ---- String ops ---- *) | 144 (* OP_STR_CONCAT *) -> let count = read_u8 frame in let parts = List.init count (fun _ -> pop vm) |> List.rev in let s = String.concat "" (List.map Sx_runtime.value_to_str parts) in push vm (String s); run vm (* ---- Define ---- *) | 128 (* OP_DEFINE *) -> let idx = read_u16 frame in let name = match consts.(idx) with String s -> s | _ -> "" in let v = peek vm in Hashtbl.replace vm.globals name v; run vm (* ---- Closure ---- *) | 51 (* OP_CLOSURE *) -> let idx = read_u16 frame in (* The constant pool entry is a code dict from the compiler *) let code_val = consts.(idx) in let code = code_from_value code_val in let cl = { code; name = None } in push vm (NativeFn ("vm-closure", fun args -> execute_closure cl args vm.globals)); run vm | opcode -> raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d" opcode (frame.ip - 1))) and vm_call vm f args = match f with | NativeFn (_, fn) -> let result = fn args in push vm result | Lambda _ -> (* Call a CEK-defined lambda through the VM *) let result = Sx_ref.cek_call f (List args) in push vm result | _ -> raise (Eval_error ("VM: not callable: " ^ Sx_runtime.value_to_str f)) (** Convert compiler output (SX dict) to a code object. *) and code_from_value v = match v with | Dict d -> let bc_list = match Hashtbl.find_opt d "bytecode" with | Some (List l | ListRef { contents = l }) -> Array.of_list (List.map (fun x -> match x with Number n -> int_of_float n | _ -> 0) l) | _ -> [||] in let entries = match Hashtbl.find_opt d "constants" with | Some (List l | ListRef { contents = l }) -> Array.of_list l | _ -> [||] in (* Recursively convert nested code objects in the pool *) let constants = Array.map (fun entry -> match entry with | Dict ed when Hashtbl.mem ed "bytecode" -> (* Nested code object — keep as Dict for lazy conversion *) entry | _ -> entry ) entries in let arity = match Hashtbl.find_opt d "arity" with | Some (Number n) -> int_of_float n | _ -> 0 in { arity; locals = arity + 16; bytecode = bc_list; constants } | _ -> { arity = 0; locals = 16; bytecode = [||]; constants = [||] } (** Execute a closure with arguments. *) and execute_closure cl args globals = let vm = create globals in let frame = { closure = cl; ip = 0; base = vm.sp } in (* Push args as locals *) List.iter (fun a -> push vm a) args; (* Pad remaining locals with nil *) for _ = List.length args to cl.code.locals - 1 do push vm Nil done; vm.frames <- [frame]; run vm; pop vm (** Execute a compiled module (top-level bytecode). *) let execute_module code globals = let cl = { code; name = Some "module" } in let vm = create globals in let frame = { closure = cl; ip = 0; base = 0 } in (* Pad locals *) for _ = 0 to code.locals - 1 do push vm Nil done; vm.frames <- [frame]; run vm; pop vm