host: fix serving-JIT host miscompile — install IO resolver for http-listen
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
The serving-JIT perform-in-HO-callback miscompile (map/rest/drop wrong
CALL_PRIM args → blank pages, empty picker) is now fully fixed, so the host
runs 100% serving JIT with NO jit-exclude.
sx-vm-extensions 81177d0e resolves a suspended HO-callback's IO inline
(instead of unwinding the native map/filter loop and corrupting the stack),
but ONLY when a synchronous resolver is installed (!_cek_io_resolver = Some).
The host serves via the http-listen primitive, whose handler drove durable IO
through cek_run_with_io with the resolver = None — so it hit the unwinding
path the fix doesn't cover. (The vm-ext repro installed a resolver, so it
never exercised the host's real no-resolver path.)
Fix: extract cek_run_with_io's IO resolution into resolve_io_request, and have
http-listen install _cek_io_resolver := Some (fun req _ -> resolve_io_request
req) — byte-identical resolution, so the inline path resolves durable reads
exactly as the CEK loop would.
Verified: host conformance 271/271; ephemeral durable server at 100% JIT (no
exclude) zero fallbacks + real content + related shown + picker 12 candidates;
live blog.rose-ash.com home/post/tags 200 with related posts, zero error-log
lines; relate-picker Playwright 4/4 (infinite-scroll + filter + relate).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -522,9 +522,59 @@ let rec load_library_file path =
|
||||
Printf.eprintf "[load-library] %s: %s\n%!" (Filename.basename path) msg
|
||||
) exprs
|
||||
|
||||
(** IO-aware CEK run — handles suspension by dispatching IO requests.
|
||||
Import requests are handled locally (load .sx file).
|
||||
Other IO requests are sent to the Python bridge. *)
|
||||
(* IO-aware CEK run (cek_run_with_io, below) — handles suspension by dispatching
|
||||
IO requests. Import requests are handled locally (load .sx file). *)
|
||||
(** Resolve a single IO request value to its response. Shared by
|
||||
cek_run_with_io's suspension loop AND the _cek_io_resolver installed for the
|
||||
http-listen serving path, so the synchronous inline-resolve path (sx_vm.ml's
|
||||
HO-callback suspend fix) resolves durable reads byte-identically to the
|
||||
CEK-driven path. Without an installed resolver, a `perform` inside an HO
|
||||
primitive callback (map/filter/…) unwinds the native loop and corrupts the
|
||||
stack — the host's map/rest/drop serving-JIT miscompile. *)
|
||||
and resolve_io_request request =
|
||||
let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in
|
||||
(match op with
|
||||
| "import" ->
|
||||
(* Resolve library locally — load the .sx file *)
|
||||
let lib_spec = Sx_runtime.get_val request (String "library") in
|
||||
let key = Sx_ref.library_name_key lib_spec in
|
||||
if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then
|
||||
(* Already loaded — just resume *)
|
||||
Nil
|
||||
else begin
|
||||
(match resolve_library_path lib_spec with
|
||||
| Some path -> load_library_file path
|
||||
| None ->
|
||||
Printf.eprintf "[import] WARNING: no file for library %s\n%!"
|
||||
(Sx_runtime.value_to_str lib_spec));
|
||||
Nil
|
||||
end
|
||||
| "text-measure" ->
|
||||
let args = let a = Sx_runtime.get_val request (String "args") in
|
||||
(match a with List l -> l | _ -> [a]) in
|
||||
let font = match args with String f :: _ -> f | _ -> "serif" in
|
||||
let size = match args with
|
||||
| [_font; Number sz; _text] -> sz
|
||||
| [_font; Number sz] -> sz
|
||||
| _ -> 16.0 in
|
||||
let text = match args with
|
||||
| [_font; _sz; String t] -> t
|
||||
| _ -> "" in
|
||||
let (w, h, asc, desc) = measure_text_otfm font size text in
|
||||
let d = Hashtbl.create 4 in
|
||||
Hashtbl.replace d "width" (Number w);
|
||||
Hashtbl.replace d "height" (Number h);
|
||||
Hashtbl.replace d "ascent" (Number asc);
|
||||
Hashtbl.replace d "descent" (Number desc);
|
||||
Dict d
|
||||
| _ ->
|
||||
let argsv = Sx_runtime.get_val request (String "args") in
|
||||
(match Sx_persist_store.handle_op op argsv with
|
||||
| Some resp -> resp
|
||||
| None ->
|
||||
let args = (match argsv with List l -> l | _ -> [argsv]) in
|
||||
io_request op args))
|
||||
|
||||
and cek_run_with_io state =
|
||||
let s = ref state in
|
||||
let is_terminal s = match Sx_ref.cek_terminal_p s with Bool true -> true | _ -> false in
|
||||
@@ -535,49 +585,7 @@ and cek_run_with_io state =
|
||||
done;
|
||||
if is_suspended !s then begin
|
||||
let request = Sx_runtime.get_val !s (String "request") in
|
||||
let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in
|
||||
let response = match op with
|
||||
| "import" ->
|
||||
(* Resolve library locally — load the .sx file *)
|
||||
let lib_spec = Sx_runtime.get_val request (String "library") in
|
||||
let key = Sx_ref.library_name_key lib_spec in
|
||||
if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then
|
||||
(* Already loaded — just resume *)
|
||||
Nil
|
||||
else begin
|
||||
(match resolve_library_path lib_spec with
|
||||
| Some path -> load_library_file path
|
||||
| None ->
|
||||
Printf.eprintf "[import] WARNING: no file for library %s\n%!"
|
||||
(Sx_runtime.value_to_str lib_spec));
|
||||
Nil
|
||||
end
|
||||
| "text-measure" ->
|
||||
let args = let a = Sx_runtime.get_val request (String "args") in
|
||||
(match a with List l -> l | _ -> [a]) in
|
||||
let font = match args with String f :: _ -> f | _ -> "serif" in
|
||||
let size = match args with
|
||||
| [_font; Number sz; _text] -> sz
|
||||
| [_font; Number sz] -> sz
|
||||
| _ -> 16.0 in
|
||||
let text = match args with
|
||||
| [_font; _sz; String t] -> t
|
||||
| _ -> "" in
|
||||
let (w, h, asc, desc) = measure_text_otfm font size text in
|
||||
let d = Hashtbl.create 4 in
|
||||
Hashtbl.replace d "width" (Number w);
|
||||
Hashtbl.replace d "height" (Number h);
|
||||
Hashtbl.replace d "ascent" (Number asc);
|
||||
Hashtbl.replace d "descent" (Number desc);
|
||||
Dict d
|
||||
| _ ->
|
||||
let argsv = Sx_runtime.get_val request (String "args") in
|
||||
(match Sx_persist_store.handle_op op argsv with
|
||||
| Some resp -> resp
|
||||
| None ->
|
||||
let args = (match argsv with List l -> l | _ -> [argsv]) in
|
||||
io_request op args)
|
||||
in
|
||||
let response = resolve_io_request request in
|
||||
s := Sx_ref.cek_resume !s response;
|
||||
loop ()
|
||||
end else
|
||||
@@ -755,6 +763,17 @@ let setup_evaluator_bridge env =
|
||||
Unix.bind sock
|
||||
(Unix.ADDR_INET (bind_addr, port));
|
||||
Unix.listen sock 64;
|
||||
(* Install the synchronous IO resolver for the serving path. Without it, a
|
||||
`perform` (durable kv read) that fires inside an HO-primitive callback
|
||||
(map/filter/reduce/…) during request handling suspends through the
|
||||
native OCaml loop, dropping its iteration state and leaving the stack
|
||||
misaligned — the serving-JIT host miscompile (map/rest/drop wrong args,
|
||||
blank pages, empty picker). With a resolver installed, sx_vm.ml resolves
|
||||
that callback's IO inline (byte-identically to cek_run_with_io) and the
|
||||
loop is never unwound. Only set if one isn't already installed. *)
|
||||
(if !Sx_types._cek_io_resolver = None then
|
||||
Sx_types._cek_io_resolver :=
|
||||
Some (fun request _state -> resolve_io_request request));
|
||||
(* SX runtime is shared across threads — serialize handler calls. *)
|
||||
let mtx = Mutex.create () in
|
||||
let reason = function
|
||||
|
||||
@@ -99,19 +99,14 @@ EPOCH=1
|
||||
for M in "${MODULES[@]}"; do
|
||||
echo "(epoch $EPOCH)"; echo "(load \"$M\")"; EPOCH=$((EPOCH+1))
|
||||
done
|
||||
# Serving-mode JIT: exclude the request-path SX (host app + Dream framework). The
|
||||
# 100%-JIT experiment surfaced the key finding — the kernel miscompile isn't just
|
||||
# 500s, it SILENTLY returns wrong results (the `drop` in host/blog-relate-options
|
||||
# yielded an empty candidate list → broken relate picker, NO error logged). Silent
|
||||
# corruption is worse than a crash, so these run on CEK (they're IO-bound — no perf
|
||||
# loss) while the kernel bug is fixed upstream (sx-vm-extensions, OP_PERFORM resume
|
||||
# — see plans/HANDOFF-jit-miscompile.md). The Datalog/relations JIT (the real win)
|
||||
# stays on. Drop this exclude once the resume bug lands, then go 100% JIT again.
|
||||
if [ "${SX_SERVING_JIT:-}" = "1" ]; then
|
||||
echo "(epoch $EPOCH)"
|
||||
echo "(eval \"(jit-exclude! \\\"host/*\\\" \\\"dream-*\\\" \\\"dr/*\\\")\")"
|
||||
EPOCH=$((EPOCH+1))
|
||||
fi
|
||||
# 100% serving JIT — NO host exclude. The serving-JIT perform-in-HO-callback
|
||||
# miscompile (map/rest/drop wrong args → blank pages, empty picker) is fixed by
|
||||
# two composing pieces: sx-vm-extensions 81177d0e resolves a callback's IO
|
||||
# inline (instead of unwinding the native HO loop) WHEN a synchronous resolver
|
||||
# is installed, and sx_server.ml's http-listen now installs that resolver (it
|
||||
# mirrors cek_run_with_io exactly). So the whole request path — host app +
|
||||
# Dream + Datalog — runs under JIT with no exclude. Verified: ephemeral durable
|
||||
# server, 100% JIT, zero fallbacks, real content, picker lists candidates.
|
||||
# Point the blog at the DURABLE file backend (persists under $SX_PERSIST_DIR),
|
||||
# then idempotently seed a welcome post (sx_content = SX element markup, the
|
||||
# editor's content model). Re-seeding is a no-op if the slug already exists.
|
||||
|
||||
@@ -1,5 +1,34 @@
|
||||
# Hand-off: serving-mode JIT miscompiles host handlers (to sx-vm-extensions)
|
||||
|
||||
> ## ✅ RESOLVED 2026-06-28 — host now runs 100% serving JIT, no exclude.
|
||||
>
|
||||
> Two composing pieces fixed it:
|
||||
> 1. **sx-vm-extensions `81177d0e`** (`sx_vm.ml` `call_closure_reuse`): when an
|
||||
> HO-primitive callback (map/filter/reduce/…) suspends on a `perform` AND a
|
||||
> synchronous resolver is installed, resolve its IO inline and run it to
|
||||
> completion instead of unwinding the native loop (which dropped iteration
|
||||
> state and misaligned the stack → the next `CALL_PRIM` got wrong args).
|
||||
> 2. **host side (`sx_server.ml`)**: that fix only engages when
|
||||
> `!_cek_io_resolver = Some`. The host serves via the `http-listen` primitive,
|
||||
> whose handler drove durable IO through `cek_run_with_io` with the resolver
|
||||
> **= None**, so it hit the unwinding path the fix doesn't cover (the
|
||||
> vm-extensions repro `repro_jit_resume.ml` *installed* a resolver, so it never
|
||||
> exercised the host's real path). Fix: extracted `cek_run_with_io`'s IO
|
||||
> resolution into `resolve_io_request`, and `http-listen` now installs
|
||||
> `_cek_io_resolver := Some (fun req _ -> resolve_io_request req)` — byte-
|
||||
> identical resolution, so the inline-resolve path resolves durable reads
|
||||
> exactly as the CEK loop would.
|
||||
>
|
||||
> Verified: host conformance **271/271**; ephemeral durable server at 100% JIT
|
||||
> (no exclude) — zero fallbacks, real content, related posts shown, picker lists
|
||||
> 12 candidates; live blog.rose-ash.com home/post/tags 200 with related posts and
|
||||
> zero error-log lines; relate-picker Playwright **4/4** (infinite-scroll +
|
||||
> filter + relate, the `drop` path). `serve.sh` exclude dropped.
|
||||
>
|
||||
> Everything below is the original hand-off, kept for the record.
|
||||
|
||||
---
|
||||
|
||||
> From the **host-on-sx** loop, 2026-06-28. We enabled `SX_SERVING_JIT=1` on the
|
||||
> live host (blog.rose-ash.com) — the Datalog/relations saturation JITs cleanly
|
||||
> and is the real win (host conformance 271/271 under JIT, 5.4× faster; live
|
||||
|
||||
Reference in New Issue
Block a user