Files
rose-ash/hosts/ocaml/bin/bench_vm.ml
giles 6c171d4906 sx: step 14 — inline JIT primitives (-69% fib, -62% loop, -50% sum on bench_vm)
The bytecode compiler emitted OP_CALL_PRIM (52) for every primitive call, even
for arithmetic and comparison hot-paths. The VM had specialized opcodes
(OP_ADD, OP_SUB, OP_EQ, etc.) defined but unused.

- lib/compiler.sx (compile-call): emit specialized 1-byte opcode when the
  primitive name + arity matches one of {+, -, *, /, =, <, >, cons, not, len,
  first, rest}. Falls back to CALL_PRIM otherwise. fib bytecode: 50 → 38 bytes.
- hosts/ocaml/lib/sx_compiler.ml: mirror change in the auto-generated OCaml
  compiler so SXBC export from mcp_tree uses the same emission.
- hosts/ocaml/lib/sx_vm.ml: extend OP_ADD/SUB/MUL/DIV to handle Integer+Integer
  (not just Number+Number). Inline OP_EQ via Sx_runtime._fast_eq. Inline
  OP_LT/GT mixed-numeric comparisons. Avoids Hashtbl lookup on the fallback
  path for the common integer cases that dominate tight loops.
- hosts/ocaml/bin/bench_vm.ml: VM-only benchmark — loads compiler.sx via CEK,
  JIT-compiles each fn, measures Sx_vm.call_closure throughput.

Median improvements (best of 3 runs of 9-min, bench_vm.exe):
  fib(22)         107.87ms →  33.13ms   -69%
  loop(200000)    429.64ms → 161.16ms   -62%
  sum-to(50000)    72.85ms →  36.74ms   -50%
  count-lt(20000)  28.44ms →  17.58ms   -38%
  count-eq(20000)  37.23ms →  15.46ms   -58%

Tests: 4550/4550 OCaml passing (unchanged). Zero regressions.

Last step in the sx-improvements roadmap — all 14 steps complete.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-07 02:38:47 +00:00

156 lines
6.1 KiB
OCaml

(** VM bytecode benchmark — measures throughput of the VM (compiled bytecode).
Loads the SX compiler via CEK, then for each test:
1. Define the function via CEK (as a Lambda).
2. Trigger JIT compilation via Sx_vm.jit_compile_lambda.
3. Call the compiled VmClosure repeatedly via Sx_vm.call_closure.
This measures pure VM execution time on the JIT path. *)
open Sx_types
let load_compiler env globals =
let compiler_path =
if Sys.file_exists "lib/compiler.sx" then "lib/compiler.sx"
else if Sys.file_exists "../../lib/compiler.sx" then "../../lib/compiler.sx"
else if Sys.file_exists "../../../lib/compiler.sx" then "../../../lib/compiler.sx"
else failwith "compiler.sx not found"
in
let ic = open_in compiler_path in
let src = really_input_string ic (in_channel_length ic) in
close_in ic;
let exprs = Sx_parser.parse_all src in
List.iter (fun e -> ignore (Sx_ref.eval_expr e (Env env))) exprs;
let rec sync e =
Hashtbl.iter (fun id v ->
let name = Sx_types.unintern id in
Hashtbl.replace globals name v) e.bindings;
match e.parent with Some p -> sync p | None -> ()
in
sync env
let _make_globals env =
let g = Hashtbl.create 512 in
Hashtbl.iter (fun name fn ->
Hashtbl.replace g name (NativeFn (name, fn))
) Sx_primitives.primitives;
let rec sync e =
Hashtbl.iter (fun id v ->
let name = Sx_types.unintern id in
if not (Hashtbl.mem g name) then Hashtbl.replace g name v) e.bindings;
match e.parent with Some p -> sync p | None -> ()
in
sync env;
g
let define_fn env globals name params body_src =
(* Define via CEK so we get a Lambda value with proper closure. *)
let body_expr = match Sx_parser.parse_all body_src with
| [e] -> e
| _ -> failwith "expected one body expression"
in
let param_syms = List (List.map (fun p -> Symbol p) params) in
let define_expr = List [Symbol "define"; Symbol name; List [Symbol "fn"; param_syms; body_expr]] in
ignore (Sx_ref.eval_expr define_expr (Env env));
(* Sync env to globals so JIT can resolve free vars. *)
let rec sync e =
Hashtbl.iter (fun id v ->
let n = Sx_types.unintern id in
Hashtbl.replace globals n v) e.bindings;
match e.parent with Some p -> sync p | None -> ()
in
sync env;
(* Now find the Lambda and JIT-compile it. *)
let lam_val = Hashtbl.find globals name in
match lam_val with
| Lambda l ->
(match Sx_vm.jit_compile_lambda l globals with
| Some cl ->
l.l_compiled <- Some cl;
Hashtbl.replace globals name (NativeFn (name, fun args ->
Sx_vm.call_closure cl args globals));
cl
| None ->
failwith (Printf.sprintf "JIT failed for %s" name))
| _ -> failwith (Printf.sprintf "%s is not a Lambda after define" name)
let bench_call name cl globals args iters =
let times = ref [] in
for _ = 1 to iters do
Gc.full_major ();
let t0 = Unix.gettimeofday () in
let _r = Sx_vm.call_closure cl args globals in
let t1 = Unix.gettimeofday () in
times := (t1 -. t0) :: !times
done;
let sorted = List.sort compare !times in
let median = List.nth sorted (iters / 2) in
let min_t = List.nth sorted 0 in
let max_t = List.nth sorted (iters - 1) in
Printf.printf " %-22s min=%8.2fms median=%8.2fms max=%8.2fms\n%!"
name (min_t *. 1000.0) (median *. 1000.0) (max_t *. 1000.0);
median
let () =
let iters =
if Array.length Sys.argv > 1
then int_of_string Sys.argv.(1)
else 7
in
Printf.printf "VM (bytecode/JIT) benchmark (%d runs each, taking median)\n%!" iters;
Printf.printf "========================================================\n%!";
let env = Sx_types.make_env () in
let bind n fn = ignore (Sx_types.env_bind env n (NativeFn (n, fn))) in
(* Seed env with primitives as NativeFn so CEK lookups work. *)
Hashtbl.iter (fun name fn ->
Hashtbl.replace env.bindings (Sx_types.intern name) (NativeFn (name, fn))
) Sx_primitives.primitives;
(* Helpers the SX compiler relies on but aren't kernel primitives. *)
bind "symbol-name" (fun args -> match args with
| [Symbol s] -> String s | _ -> raise (Eval_error "symbol-name"));
bind "keyword-name" (fun args -> match args with
| [Keyword k] -> String k | _ -> raise (Eval_error "keyword-name"));
bind "make-symbol" (fun args -> match args with
| [String s] -> Symbol s
| [v] -> Symbol (Sx_types.value_to_string v)
| _ -> raise (Eval_error "make-symbol"));
bind "sx-serialize" (fun args -> match args with
| [v] -> String (Sx_types.inspect v)
| _ -> raise (Eval_error "sx-serialize"));
let globals = Hashtbl.create 1024 in
Hashtbl.iter (fun name fn ->
Hashtbl.replace globals name (NativeFn (name, fn))
) Sx_primitives.primitives;
Printf.printf "Loading compiler.sx ... %!";
let t0 = Unix.gettimeofday () in
load_compiler env globals;
Printf.printf "%.0fms\n%!" ((Unix.gettimeofday () -. t0) *. 1000.0);
(* fib(22) — recursive call benchmark *)
let fib_cl = define_fn env globals "fib" ["n"]
"(if (< n 2) n (+ (fib (- n 1)) (fib (- n 2))))" in
let _ = bench_call "fib(22)" fib_cl globals [Number 22.0] iters in
(* tight loop *)
let loop_cl = define_fn env globals "loop" ["n"; "acc"]
"(if (= n 0) acc (loop (- n 1) (+ acc 1)))" in
let _ = bench_call "loop(200000)" loop_cl globals [Number 200000.0; Number 0.0] iters in
(* sum-to *)
let sum_cl = define_fn env globals "sum_to" ["n"; "acc"]
"(if (= n 0) acc (sum_to (- n 1) (+ acc n)))" in
let _ = bench_call "sum-to(50000)" sum_cl globals [Number 50000.0; Number 0.0] iters in
(* count-lt: comparison-heavy *)
let cnt_cl = define_fn env globals "count_lt" ["n"; "acc"]
"(if (= n 0) acc (count_lt (- n 1) (if (< n 10000) (+ acc 1) acc)))" in
let _ = bench_call "count-lt(20000)" cnt_cl globals [Number 20000.0; Number 0.0] iters in
(* count-eq: equality-heavy on multiples of 7 *)
let eq_cl = define_fn env globals "count_eq" ["n"; "acc"]
"(if (= n 0) acc (count_eq (- n 1) (if (= 0 (- n (* 7 (/ n 7)))) (+ acc 1) acc)))" in
let _ = bench_call "count-eq(20000)" eq_cl globals [Number 20000.0; Number 0.0] iters in
Printf.printf "\nDone.\n%!"