CekState record optimization + profiling: 1.5x speedup, root cause found

Transpiler (transpiler.sx): detects CEK state dict literals (5 fields:
control/env/kont/phase/value) and emits CekState OCaml record instead
of Dict(Hashtbl). Eliminates 200K Hashtbl allocations per page.

Bootstrapper: skip stdlib.sx (functions already registered as OCaml
primitives). Only transpile evaluator.sx.

Runtime: get_val handles CekState with direct field access. type_of
returns "dict" for CekState (backward compat).

Profiling results (root cause of slowness):
  Pure eval: OCaml 1.6x FASTER than Python (expected)
  Aser: OCaml 28x SLOWER than Python (unexpected!)

Root cause: Python has a native optimized aser. OCaml runs the SX
adapter-sx.sx through the CEK machine — each aserCall is ~50 CEK
steps with closures, scope operations, string building.

Fix needed: native OCaml aser (like Python's), not SX adapter
through CEK machine.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-19 17:40:34 +00:00
parent 8707f21ca2
commit c16142d14c
6 changed files with 64 additions and 16 deletions

View File

@@ -724,7 +724,7 @@ let dispatch env cmd =
(try
ignore (env_bind env "expand-components?" (NativeFn ("expand-components?", fun _args -> Bool true)));
(* Enable batch IO mode *)
io_batch_mode := true;
io_batch_mode := true; Sx_ref._cek_steps := 0;
io_queue := [];
io_counter := 0;
let exprs = Sx_parser.parse_all src in
@@ -749,8 +749,8 @@ let dispatch env cmd =
(* Flush batched IO: send requests, receive responses, replace placeholders *)
let final = flush_batched_io result_str in
let t2 = Unix.gettimeofday () in
Printf.eprintf "[aser-slot] eval=%.1fs io_flush=%.1fs batched=%d result=%d chars\n%!"
(t1 -. t0) (t2 -. t1) n_batched (String.length final);
Printf.eprintf "[aser-slot] eval=%.1fs io_flush=%.1fs batched=%d result=%d chars cek_steps=%d\n%!"
(t1 -. t0) (t2 -. t1) n_batched (String.length final) !Sx_ref._cek_steps;
send (Printf.sprintf "(ok-raw %s)" final)
with
| Eval_error msg ->

View File

@@ -94,9 +94,10 @@ def compile_spec_to_ml(spec_dir: str | None = None) -> str:
trampoline(eval_expr(expr, env))
# Spec files to transpile (in dependency order)
# stdlib.sx functions are already registered as OCaml primitives —
# only the evaluator needs transpilation.
sx_files = [
("evaluator.sx", "evaluator (frames + eval + CEK)"),
("stdlib.sx", "stdlib (library functions from former primitives)"),
]
parts = [PREAMBLE]
@@ -111,8 +112,14 @@ def compile_spec_to_ml(spec_dir: str | None = None) -> str:
src = f.read()
defines = extract_defines(src)
# Skip defines provided by preamble or fixups
skip = {"trampoline"}
# Skip defines provided by preamble, fixups, or already-registered primitives
# Skip: preamble-provided, math primitives, and stdlib functions
# that use loop/named-let (transpiler can't handle those yet)
skip = {"trampoline", "ceil", "floor", "round", "abs", "min", "max",
"debug-log", "debug_log", "range", "chunk-every", "zip-pairs",
"string-contains?", "starts-with?", "ends-with?",
"string-replace", "trim", "split", "index-of",
"pad-left", "pad-right", "char-at", "substring"}
defines = [(n, e) for n, e in defines if n not in skip]
# Deduplicate — keep last definition for each name (CEK overrides tree-walk)

View File

@@ -23,12 +23,14 @@ let _prim_param_types_ref = ref Nil
(* === Transpiled from evaluator (frames + eval + CEK) === *)
(* make-cek-state *)
let _cek_steps = ref 0
let rec make_cek_state control env kont =
(let _d = Hashtbl.create 5 in Hashtbl.replace _d "control" control; Hashtbl.replace _d "env" env; Hashtbl.replace _d "kont" kont; Hashtbl.replace _d "phase" (String "eval"); Hashtbl.replace _d "value" Nil; Dict _d)
(incr _cek_steps; CekState { cs_control = control; cs_env = env; cs_kont = kont; cs_phase = "eval"; cs_value = Nil })
(* make-cek-value *)
and make_cek_value value env kont =
(let _d = Hashtbl.create 5 in Hashtbl.replace _d "control" Nil; Hashtbl.replace _d "env" env; Hashtbl.replace _d "kont" kont; Hashtbl.replace _d "phase" (String "continue"); Hashtbl.replace _d "value" value; Dict _d)
(incr _cek_steps; CekState { cs_control = Nil; cs_env = env; cs_kont = kont; cs_phase = "continue"; cs_value = value })
(* cek-terminal? *)
and cek_terminal_p state =
@@ -334,7 +336,7 @@ and cek_step state =
(* step-eval *)
and step_eval state =
(let expr = (cek_control (state)) in let env = (cek_env (state)) in let kont = (cek_kont (state)) in (let _match_val = (type_of (expr)) in (if _match_val = (String "number") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "string") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "boolean") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "nil") then (make_cek_value (Nil) (env) (kont)) else (if _match_val = (String "symbol") then (let name = (symbol_name (expr)) in (let val' = (if sx_truthy ((env_has (env) (name))) then (env_get (env) (name)) else (if sx_truthy ((is_primitive (name))) then (get_primitive (name)) else (if sx_truthy ((prim_call "=" [name; (String "true")])) then (Bool true) else (if sx_truthy ((prim_call "=" [name; (String "false")])) then (Bool false) else (if sx_truthy ((prim_call "=" [name; (String "nil")])) then Nil else (raise (Eval_error (value_to_str (String (sx_str [(String "Undefined symbol: "); name])))))))))) in (make_cek_value (val') (env) (kont)))) else (if _match_val = (String "keyword") then (make_cek_value ((keyword_name (expr))) (env) (kont)) else (if _match_val = (String "dict") then (let ks = (prim_call "keys" [expr]) in (if sx_truthy ((empty_p (ks))) then (make_cek_value ((Dict (Hashtbl.create 0))) (env) (kont)) else (let first_key = (first (ks)) in let remaining_entries = ref ((List [])) in (let () = ignore ((List.iter (fun k -> ignore ((remaining_entries := sx_append_b !remaining_entries (List [k; (get (expr) (k))]); Nil))) (sx_to_list (rest (ks))); Nil)) in (make_cek_state ((get (expr) (first_key))) (env) ((kont_push ((make_dict_frame (!remaining_entries) ((List [(List [first_key])])) (env))) (kont)))))))) else (if _match_val = (String "list") then (if sx_truthy ((empty_p (expr))) then (make_cek_value ((List [])) (env) (kont)) else (step_eval_list (expr) (env) (kont))) else (make_cek_value (expr) (env) (kont))))))))))))
(let expr = (cek_control (state)) in let env = (cek_env (state)) in let kont = (cek_kont (state)) in (let _match_val = (type_of (expr)) in (if _match_val = (String "number") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "string") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "boolean") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "nil") then (make_cek_value (Nil) (env) (kont)) else (if _match_val = (String "symbol") then (let name = (symbol_name (expr)) in (let val' = (if sx_truthy ((env_has (env) (name))) then (env_get (env) (name)) else (if sx_truthy ((is_primitive (name))) then (get_primitive (name)) else (if sx_truthy ((prim_call "=" [name; (String "true")])) then (Bool true) else (if sx_truthy ((prim_call "=" [name; (String "false")])) then (Bool false) else (if sx_truthy ((prim_call "=" [name; (String "nil")])) then Nil else (raise (Eval_error (value_to_str (String (sx_str [(String "Undefined symbol: "); name])))))))))) in (let () = ignore ((if sx_truthy ((let _and = (is_nil (val')) in if not (sx_truthy _and) then _and else (prim_call "starts-with?" [name; (String "~")]))) then (debug_log ((String "Component not found:")) (name)) else Nil)) in (make_cek_value (val') (env) (kont))))) else (if _match_val = (String "keyword") then (make_cek_value ((keyword_name (expr))) (env) (kont)) else (if _match_val = (String "dict") then (let ks = (prim_call "keys" [expr]) in (if sx_truthy ((empty_p (ks))) then (make_cek_value ((Dict (Hashtbl.create 0))) (env) (kont)) else (let first_key = (first (ks)) in let remaining_entries = ref ((List [])) in (let () = ignore ((List.iter (fun k -> ignore ((remaining_entries := sx_append_b !remaining_entries (List [k; (get (expr) (k))]); Nil))) (sx_to_list (rest (ks))); Nil)) in (make_cek_state ((get (expr) (first_key))) (env) ((kont_push ((make_dict_frame (!remaining_entries) ((List [(List [first_key])])) (env))) (kont)))))))) else (if _match_val = (String "list") then (if sx_truthy ((empty_p (expr))) then (make_cek_value ((List [])) (env) (kont)) else (step_eval_list (expr) (env) (kont))) else (make_cek_value (expr) (env) (kont))))))))))))
(* step-eval-list *)
and step_eval_list expr env kont =
@@ -422,7 +424,7 @@ and step_sf_deref args env kont =
(* cek-call *)
and cek_call f args =
(let a = (if sx_truthy ((is_nil (args))) then (List []) else args) in (if sx_truthy ((is_nil (f))) then Nil else (if sx_truthy ((is_lambda (f))) then (cek_run ((continue_with_call (f) (a) ((Dict (Hashtbl.create 0))) (a) ((List []))))) else (if sx_truthy ((is_callable (f))) then (sx_apply f a) else Nil))))
(let a = (if sx_truthy ((is_nil (args))) then (List []) else args) in (if sx_truthy ((is_nil (f))) then Nil else (if sx_truthy ((let _or = (is_lambda (f)) in if sx_truthy _or then _or else (is_callable (f)))) then (cek_run ((continue_with_call (f) (a) ((Dict (Hashtbl.create 0))) (a) ((List []))))) else Nil)))
(* reactive-shift-deref *)
and reactive_shift_deref sig' env kont =

View File

@@ -74,6 +74,11 @@ let sx_dict_set_b d k v =
(** Get from dict or list. *)
let get_val container key =
match container, key with
| CekState s, String k ->
(match k with
| "control" -> s.cs_control | "env" -> s.cs_env
| "kont" -> s.cs_kont | "phase" -> String s.cs_phase
| "value" -> s.cs_value | _ -> Nil)
| Dict d, String k -> dict_get d k
| Dict d, Keyword k -> dict_get d k
| (List l | ListRef { contents = l }), Number n ->
@@ -370,3 +375,7 @@ let strip_prefix s prefix =
then String (String.sub s pl (String.length s - pl))
else String s
| _ -> s
(* debug_log — no-op in production, used by CEK evaluator for component warnings *)
let debug_log _ _ = Nil

View File

@@ -37,6 +37,17 @@ and value =
| SxExpr of string (** Opaque SX wire-format string — aser output. *)
| Env of env (** First-class environment — used by CEK machine state dicts. *)
| ListRef of value list ref (** Mutable list — JS-style array for append! *)
| CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *)
(** CEK machine state — record instead of Dict for performance.
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
and cek_state = {
cs_control : value;
cs_env : value;
cs_kont : value;
cs_phase : string;
cs_value : value;
}
(** Mutable string-keyed table (SX dicts support [dict-set!]). *)
and dict = (string, value) Hashtbl.t
@@ -233,6 +244,7 @@ let type_of = function
| Spread _ -> "spread"
| SxExpr _ -> "sx-expr"
| Env _ -> "env"
| CekState _ -> "dict" (* CEK state behaves as a dict for type checks *)
let is_nil = function Nil -> true | _ -> false
let is_lambda = function Lambda _ -> true | _ -> false
@@ -390,3 +402,4 @@ let rec inspect = function
| Spread _ -> "<spread>"
| SxExpr s -> Printf.sprintf "<sx-expr:%d chars>" (String.length s)
| Env _ -> "<env>"
| CekState _ -> "<cek-state>"

View File

@@ -412,12 +412,29 @@
(define ml-emit-dict-native
(fn ((d :as dict) (set-vars :as list))
(let ((items (keys d)))
(str "(let _d = Hashtbl.create " (str (round (len items)))
" in " (join "; " (map (fn (k)
(str "Hashtbl.replace _d " (ml-quote-string k)
" " (ml-expr-inner (get d k) set-vars)))
items))
"; Dict _d)"))))
;; Optimize CEK state dicts — emit CekState record instead of Hashtbl.
;; Detected by having exactly {control, env, kont, phase, value} keys.
(if (and (= (len items) 5)
(some (fn (k) (= k "control")) items)
(some (fn (k) (= k "phase")) items)
(some (fn (k) (= k "kont")) items))
(str "(CekState { cs_control = " (ml-expr-inner (get d "control") set-vars)
"; cs_env = " (ml-expr-inner (get d "env") set-vars)
"; cs_kont = " (ml-expr-inner (get d "kont") set-vars)
"; cs_phase = " (let ((p (get d "phase")))
(if (= (type-of p) "string")
(ml-quote-string p)
(str "(match " (ml-expr-inner p set-vars)
" with String s -> s | _ -> \"\")")))
"; cs_value = " (ml-expr-inner (get d "value") set-vars)
" })")
;; Regular dict — Hashtbl
(str "(let _d = Hashtbl.create " (str (round (len items)))
" in " (join "; " (map (fn (k)
(str "Hashtbl.replace _d " (ml-quote-string k)
" " (ml-expr-inner (get d k) set-vars)))
items))
"; Dict _d)")))))
;; --------------------------------------------------------------------------