sx: step 12 — prim_call fast path (-66% fib, -86% reduce)

CEK frames were already records (cek_frame in sx_types.ml), so the actual
hot-path bottleneck was prim_call "=" [...] in step_continue/step_eval
dispatch: each step did a Hashtbl lookup + 2x list cons + pattern match
just to compare frame-type strings.

Added a short-circuit fast path in prim_call (sx_runtime.ml) for the
hot operators: =, <, >, <=, >=, empty?, first, rest, len. These bypass
the primitives Hashtbl entirely and dispatch directly on value shape.
Inlined _fast_eq for scalar/string equality, which dominates frame-type
dispatch comparisons.

Added bin/bench_cek.exe with five tight-loop benchmarks (fib, loop,
map, reduce, let-heavy). Median of 7 runs:

  fib(18)            2789ms -> 941ms   (-66%)
  loop(5000)         2018ms -> 620ms   (-69%)
  map sq xs(1000)    108ms  -> 48ms    (-56%)
  reduce + ys(2000)  72ms   -> 10ms    (-86%)
  let-heavy(2000)    491ms  -> 271ms   (-45%)

Tests: 4545/4545 passing baseline preserved (1339 pre-existing failures
unchanged).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-07 01:46:23 +00:00
parent 1fbfdfe4ae
commit a66c0f66f0
4 changed files with 159 additions and 6 deletions

View File

@@ -0,0 +1,73 @@
(** CEK benchmark — measures throughput of the CEK evaluator on tight loops.
Usage:
dune exec bin/bench_cek.exe
dune exec bin/bench_cek.exe -- 5 (5 runs each)
*)
open Sx_types
open Sx_parser
let parse_one s =
let exprs = parse_all s in
match exprs with
| e :: _ -> e
| [] -> failwith "empty parse"
let parse_many s = parse_all s
let bench_run name setup expr iters =
let env = Sx_types.make_env () in
(* Run setup forms in env *)
List.iter (fun e -> ignore (Sx_ref.eval_expr e (Env env))) setup;
let times = ref [] in
for _ = 1 to iters do
Gc.full_major ();
let t0 = Unix.gettimeofday () in
let _r = Sx_ref.eval_expr expr (Env env) in
let t1 = Unix.gettimeofday () in
times := (t1 -. t0) :: !times
done;
let sorted = List.sort compare !times in
let median = List.nth sorted (iters / 2) in
let min_t = List.nth sorted 0 in
let max_t = List.nth sorted (iters - 1) in
Printf.printf " %-22s min=%8.2fms median=%8.2fms max=%8.2fms\n%!"
name (min_t *. 1000.0) (median *. 1000.0) (max_t *. 1000.0);
median
let () =
let iters =
if Array.length Sys.argv > 1
then int_of_string Sys.argv.(1)
else 5
in
Printf.printf "CEK benchmark (%d runs each, taking median)\n%!" iters;
Printf.printf "==========================================\n%!";
(* fib 18 — recursive function call benchmark, smallish *)
let fib_setup = parse_many "(define (fib n) (if (< n 2) n (+ (fib (- n 1)) (fib (- n 2)))))" in
let fib_expr = parse_one "(fib 18)" in
let _ = bench_run "fib(18)" fib_setup fib_expr iters in
(* loop 5000 — tight let loop *)
let loop_setup = parse_many "(define (loop n acc) (if (= n 0) acc (loop (- n 1) (+ acc 1))))" in
let loop_expr = parse_one "(loop 5000 0)" in
let _ = bench_run "loop(5000)" loop_setup loop_expr iters in
(* map+square over 1000 elem list *)
let map_setup = parse_many "(define (range-list n) (let loop ((i 0) (acc (list))) (if (= i n) acc (loop (+ i 1) (cons i acc))))) (define xs (range-list 1000))" in
let map_expr = parse_one "(map (fn (x) (* x x)) xs)" in
let _ = bench_run "map sq xs(1000)" map_setup map_expr iters in
(* reduce + over 2000 elem list *)
let red_setup = parse_many "(define (range-list n) (let loop ((i 0) (acc (list))) (if (= i n) acc (loop (+ i 1) (cons i acc))))) (define ys (range-list 2000))" in
let red_expr = parse_one "(reduce + 0 ys)" in
let _ = bench_run "reduce + ys(2000)" red_setup red_expr iters in
(* let-heavy: many bindings + if *)
let lh_setup = parse_many "(define (lh n) (let ((a 1) (b 2) (c 3) (d 4)) (if (= n 0) (+ a b c d) (lh (- n 1)))))" in
let lh_expr = parse_one "(lh 2000)" in
let _ = bench_run "let-heavy(2000)" lh_setup lh_expr iters in
Printf.printf "\nDone.\n%!"

View File

@@ -1,5 +1,5 @@
(executables
(names run_tests debug_set sx_server integration_tests)
(names run_tests debug_set sx_server integration_tests bench_cek)
(libraries sx unix threads.posix otfm yojson))
(executable

View File

@@ -6,11 +6,72 @@
open Sx_types
(** Call a registered primitive by name. *)
(** Fast path equality — same as Sx_primitives.safe_eq for the common cases
that show up in hot dispatch (string vs string, etc). Falls through to
the registered "=" primitive for complex cases. *)
let rec _fast_eq a b =
if a == b then true
else match a, b with
| String x, String y -> x = y
| Integer x, Integer y -> x = y
| Number x, Number y -> x = y
| Integer x, Number y -> float_of_int x = y
| Number x, Integer y -> x = float_of_int y
| Bool x, Bool y -> x = y
| Nil, Nil -> true
| Symbol x, Symbol y -> x = y
| Keyword x, Keyword y -> x = y
| List la, List lb ->
(try List.for_all2 _fast_eq la lb with Invalid_argument _ -> false)
| _ -> false
(** Call a registered primitive by name.
Fast path for hot dispatch primitives ([=], [<], [>], [<=], [>=], [empty?],
[first], [rest], [len]) skips the Hashtbl lookup entirely — these are
called millions of times in the CEK [step_continue]/[step_eval] dispatch. *)
let prim_call name args =
match Hashtbl.find_opt Sx_primitives.primitives name with
| Some f -> f args
| None -> raise (Eval_error ("Unknown primitive: " ^ name))
(* Hot path: most-frequently-called primitives by step_continue dispatch *)
match name, args with
| "=", [a; b] -> Bool (_fast_eq a b)
| "empty?", [List []] -> Bool true
| "empty?", [List _] -> Bool false
| "empty?", [ListRef { contents = [] }] -> Bool true
| "empty?", [ListRef _] -> Bool false
| "empty?", [Nil] -> Bool true
| "first", [List (x :: _)] -> x
| "first", [List []] -> Nil
| "first", [ListRef { contents = (x :: _) }] -> x
| "first", [ListRef _] -> Nil
| "first", [Nil] -> Nil
| "rest", [List (_ :: xs)] -> List xs
| "rest", [List []] -> List []
| "rest", [ListRef { contents = (_ :: xs) }] -> List xs
| "rest", [ListRef _] -> List []
| "rest", [Nil] -> List []
| "len", [List l] -> Integer (List.length l)
| "len", [ListRef r] -> Integer (List.length !r)
| "len", [String s] -> Integer (String.length s)
| "len", [Nil] -> Integer 0
| "<", [Integer x; Integer y] -> Bool (x < y)
| "<", [Number x; Number y] -> Bool (x < y)
| "<", [Integer x; Number y] -> Bool (float_of_int x < y)
| "<", [Number x; Integer y] -> Bool (x < float_of_int y)
| ">", [Integer x; Integer y] -> Bool (x > y)
| ">", [Number x; Number y] -> Bool (x > y)
| ">", [Integer x; Number y] -> Bool (float_of_int x > y)
| ">", [Number x; Integer y] -> Bool (x > float_of_int y)
| "<=", [Integer x; Integer y] -> Bool (x <= y)
| "<=", [Number x; Number y] -> Bool (x <= y)
| "<=", [Integer x; Number y] -> Bool (float_of_int x <= y)
| "<=", [Number x; Integer y] -> Bool (x <= float_of_int y)
| ">=", [Integer x; Integer y] -> Bool (x >= y)
| ">=", [Number x; Number y] -> Bool (x >= y)
| ">=", [Integer x; Number y] -> Bool (float_of_int x >= y)
| ">=", [Number x; Integer y] -> Bool (x >= float_of_int y)
| _ ->
match Hashtbl.find_opt Sx_primitives.primitives name with
| Some f -> f args
| None -> raise (Eval_error ("Unknown primitive: " ^ name))
(** Convert any SX value to an OCaml string (internal). *)
let value_to_str = function

View File

@@ -189,6 +189,25 @@ These are incremental and can interleave with other phases.
tagged variant lists. Eliminates allocation pressure from list construction per frame.
Profile before/after on a tight-loop benchmark.
**Outcome:** Frames were already records (`cek_frame` in `sx_types.ml`) — the actual
hot-path bottleneck was `prim_call "=" [...]` in `step_continue`/`step_eval` dispatch:
each step did a Hashtbl lookup + 2x list cons + pattern match per comparison. Added a
fast path in `prim_call` (sx_runtime.ml) for `=`, `<`, `>`, `<=`, `>=`, `empty?`,
`first`, `rest`, `len` that skips the table lookup entirely. Also inlined `_fast_eq`
for the common scalar-equality cases that dominate frame-type dispatch. Median
improvements (bench_cek.exe, 7 runs):
| Benchmark | Before | After | Change |
|-----------|--------|-------|--------|
| fib(18) | 2789ms | 941ms | -66% |
| loop(5000) | 2018ms | 620ms | -69% |
| map sq(1000) | 108ms | 48ms | -56% |
| reduce + (2000) | 72ms | 10ms | -86% |
| let-heavy(2000) | 491ms | 271ms | -45% |
Tests: 4545 passing (unchanged baseline), 1339 failing (unchanged baseline).
Benchmark binary: `bin/bench_cek.exe`.
### Step 13: Buffer primitive for string building
Add `make-buffer`, `buffer-append!`, `buffer->string` primitives. Eliminates the
@@ -218,7 +237,7 @@ these when operands are known numbers/lists.
| 9 — parser feature registry | [x] | 986d6411 |
| 10 — compiler + as converter registry | [x] | d22361e4 |
| 11 — plugin migration + worker | [x] | 6328b810 |
| 12 — frame records | [ ] | — |
| 12 — frame records | [x] | — (fib -66%, loop -69%, reduce -86% via prim_call fast path) |
| 13 — buffer primitive | [ ] | — |
| 14 — inline primitives JIT | [ ] | — |