Files
rose-ash/hosts/ocaml/lib/sx_vm.ml
giles 40d0f1a438 SX bytecode: format definition, compiler, OCaml VM (Phase 1)
Three new files forming the bytecode compilation pipeline:

spec/bytecode.sx — opcode definitions (~65 ops):
  - Stack/constant ops (CONST, NIL, TRUE, POP, DUP)
  - Lexical variable access (LOCAL_GET/SET, UPVALUE_GET/SET, GLOBAL_GET/SET)
  - Jump-based control flow (JUMP, JUMP_IF_FALSE/TRUE)
  - Function ops (CALL, TAIL_CALL, RETURN, CLOSURE, CALL_PRIM)
  - HO form ops (ITER_INIT/NEXT, MAP_OPEN/APPEND/CLOSE)
  - Scope/continuation ops (SCOPE_PUSH/POP, RESET, SHIFT)
  - Aser specialization (ASER_TAG, ASER_FRAG)

spec/compiler.sx — SX-to-bytecode compiler (SX code, portable):
  - Scope analysis: resolve variables to local/upvalue/global at compile time
  - Tail position detection for TCO
  - Code generation for: if, when, and, or, let, begin, lambda,
    define, set!, quote, function calls, primitive calls
  - Constant pool with deduplication
  - Jump patching for forward references

hosts/ocaml/lib/sx_vm.ml — bytecode interpreter (OCaml):
  - Stack-based VM with array-backed operand stack
  - Call frames with base pointer for locals
  - Direct opcode dispatch via pattern match
  - Zero allocation per step (unlike CEK machine's dict-per-step)
  - Handles: constants, variables, jumps, calls, primitives,
    collections, string concat, define

Architecture: compiler.sx is spec (SX, portable). VM is platform
(OCaml-native). Same bytecode runs on JS/WASM VMs.

Also includes: CekFrame record optimization in transpiler.sx
(29 frame types as records instead of Hashtbl).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 18:25:41 +00:00

236 lines
6.9 KiB
OCaml

(** SX bytecode VM — stack-based interpreter.
Executes bytecode produced by compiler.sx.
Designed for speed: array-based stack, direct dispatch,
no allocation per step (unlike the CEK machine).
This is the platform-native execution engine. The same bytecode
runs on all platforms (OCaml, JS, WASM). *)
open Sx_types
(** Bytecode instruction stream. *)
type bytecode = int array
(** Code object — compiled function body. *)
type code = {
arity : int;
locals : int;
bytecode : bytecode;
constants : value array;
}
(** Upvalue — reference to a captured variable. *)
type upvalue_ref =
| Open of int (* index into enclosing frame's locals *)
| Closed of value ref (* heap-allocated after frame returns *)
(** Closure — code + captured upvalues. *)
type closure = {
code : code;
upvalues : upvalue_ref array;
name : string option;
}
(** Call frame — one per function invocation. *)
type frame = {
closure : closure;
mutable ip : int;
base : int; (* base index in value stack *)
}
(** VM state. *)
type vm = {
mutable stack : value array;
mutable sp : int; (* stack pointer — next free slot *)
mutable frames : frame list;
globals : (string, value) Hashtbl.t;
}
(** Create a new VM. *)
let create () = {
stack = Array.make 1024 Nil;
sp = 0;
frames = [];
globals = Hashtbl.create 256;
}
(** Stack operations. *)
let push vm v =
if vm.sp >= Array.length vm.stack then begin
let new_stack = Array.make (vm.sp * 2) Nil in
Array.blit vm.stack 0 new_stack 0 vm.sp;
vm.stack <- new_stack
end;
vm.stack.(vm.sp) <- v;
vm.sp <- vm.sp + 1
let pop vm =
vm.sp <- vm.sp - 1;
vm.stack.(vm.sp)
let peek vm =
vm.stack.(vm.sp - 1)
(** Read operands from bytecode. *)
let read_u8 frame =
let v = frame.closure.code.bytecode.(frame.ip) in
frame.ip <- frame.ip + 1; v
let read_u16 frame =
let lo = frame.closure.code.bytecode.(frame.ip) in
let hi = frame.closure.code.bytecode.(frame.ip + 1) in
frame.ip <- frame.ip + 2;
lo lor (hi lsl 8)
let read_i16 frame =
let v = read_u16 frame in
if v >= 32768 then v - 65536 else v
(** Execute bytecode until OP_RETURN. *)
let rec run vm =
match vm.frames with
| [] -> failwith "VM: no frame"
| frame :: _ ->
let code = frame.closure.code in
let bc = code.bytecode in
let op = bc.(frame.ip) in
frame.ip <- frame.ip + 1;
match op with
(* ---- Stack / Constants ---- *)
| 0x01 -> (* OP_CONST *)
let idx = read_u16 frame in
push vm code.constants.(idx);
run vm
| 0x02 -> push vm Nil; run vm (* OP_NIL *)
| 0x03 -> push vm (Bool true); run vm (* OP_TRUE *)
| 0x04 -> push vm (Bool false); run vm (* OP_FALSE *)
| 0x05 -> ignore (pop vm); run vm (* OP_POP *)
| 0x06 -> push vm (peek vm); run vm (* OP_DUP *)
(* ---- Variable access ---- *)
| 0x10 -> (* OP_LOCAL_GET *)
let slot = read_u8 frame in
push vm vm.stack.(frame.base + slot);
run vm
| 0x11 -> (* OP_LOCAL_SET *)
let slot = read_u8 frame in
vm.stack.(frame.base + slot) <- peek vm;
run vm
| 0x14 -> (* OP_GLOBAL_GET *)
let idx = read_u16 frame in
let name = match code.constants.(idx) with String s -> s | _ -> "" in
let v = try Hashtbl.find vm.globals name with Not_found ->
(* Fall back to primitives *)
try Sx_primitives.get name
with _ -> raise (Eval_error ("Undefined: " ^ name))
in
push vm v; run vm
| 0x15 -> (* OP_GLOBAL_SET *)
let idx = read_u16 frame in
let name = match code.constants.(idx) with String s -> s | _ -> "" in
Hashtbl.replace vm.globals name (peek vm);
run vm
(* ---- Control flow ---- *)
| 0x20 -> (* OP_JUMP *)
let offset = read_i16 frame in
frame.ip <- frame.ip + offset;
run vm
| 0x21 -> (* OP_JUMP_IF_FALSE *)
let offset = read_i16 frame in
let v = pop vm in
if not (sx_truthy v) then frame.ip <- frame.ip + offset;
run vm
| 0x22 -> (* OP_JUMP_IF_TRUE *)
let offset = read_i16 frame in
let v = pop vm in
if sx_truthy v then frame.ip <- frame.ip + offset;
run vm
(* ---- Function calls ---- *)
| 0x30 -> (* OP_CALL *)
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
call vm f (Array.to_list (Array.of_list (List.rev (Array.to_list args))));
run vm
| 0x31 -> (* OP_TAIL_CALL *)
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let _f = pop vm in
(* TODO: tail call optimization — reuse frame *)
let args_list = List.rev (Array.to_list args) in
call vm _f args_list;
run vm
| 0x32 -> (* OP_RETURN *)
let result = pop vm in
vm.frames <- List.tl vm.frames;
vm.sp <- frame.base;
push vm result
(* Don't recurse — return to caller *)
| 0x34 -> (* OP_CALL_PRIM *)
let idx = read_u16 frame in
let argc = read_u8 frame in
let name = match code.constants.(idx) with String s -> s | _ -> "" in
let args = List.init argc (fun _ -> pop vm) |> List.rev in
let result = Sx_primitives.call name args in
push vm result;
run vm
(* ---- Collections ---- *)
| 0x40 -> (* OP_LIST *)
let count = read_u16 frame in
let items = List.init count (fun _ -> pop vm) |> List.rev in
push vm (List items);
run vm
| 0x41 -> (* OP_DICT *)
let count = read_u16 frame in
let d = Hashtbl.create count in
for _ = 1 to count do
let v = pop vm in
let k = pop vm in
let key = match k with String s -> s | Keyword s -> s | _ -> "" in
Hashtbl.replace d key v
done;
push vm (Dict d);
run vm
(* ---- String ops ---- *)
| 0x90 -> (* OP_STR_CONCAT *)
let count = read_u8 frame in
let parts = List.init count (fun _ -> pop vm) |> List.rev in
let s = String.concat "" (List.map value_to_str parts) in
push vm (String s);
run vm
(* ---- Define ---- *)
| 0x80 -> (* OP_DEFINE *)
let idx = read_u16 frame in
let name = match code.constants.(idx) with String s -> s | _ -> "" in
let v = peek vm in
Hashtbl.replace vm.globals name v;
run vm
| opcode ->
raise (Eval_error (Printf.sprintf "VM: unknown opcode 0x%02x at ip=%d" opcode (frame.ip - 1)))
and call vm f args =
match f with
| NativeFn (_, fn) ->
let result = fn args in
push vm result
| _ ->
raise (Eval_error ("VM: not callable: " ^ value_to_str f))
(** Execute a code object in a fresh VM. *)
let execute code globals =
let vm = create () in
(* Copy globals *)
Hashtbl.iter (fun k v -> Hashtbl.replace vm.globals k v) globals;
let closure = { code; upvalues = [||]; name = None } in
let frame = { closure; ip = 0; base = 0 } in
vm.frames <- [frame];
run vm;
pop vm