host: fix serving-JIT host miscompile — install IO resolver for http-listen
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
The serving-JIT perform-in-HO-callback miscompile (map/rest/drop wrong
CALL_PRIM args → blank pages, empty picker) is now fully fixed, so the host
runs 100% serving JIT with NO jit-exclude.
sx-vm-extensions 81177d0e resolves a suspended HO-callback's IO inline
(instead of unwinding the native map/filter loop and corrupting the stack),
but ONLY when a synchronous resolver is installed (!_cek_io_resolver = Some).
The host serves via the http-listen primitive, whose handler drove durable IO
through cek_run_with_io with the resolver = None — so it hit the unwinding
path the fix doesn't cover. (The vm-ext repro installed a resolver, so it
never exercised the host's real no-resolver path.)
Fix: extract cek_run_with_io's IO resolution into resolve_io_request, and have
http-listen install _cek_io_resolver := Some (fun req _ -> resolve_io_request
req) — byte-identical resolution, so the inline path resolves durable reads
exactly as the CEK loop would.
Verified: host conformance 271/271; ephemeral durable server at 100% JIT (no
exclude) zero fallbacks + real content + related shown + picker 12 candidates;
live blog.rose-ash.com home/post/tags 200 with related posts, zero error-log
lines; relate-picker Playwright 4/4 (infinite-scroll + filter + relate).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -522,9 +522,59 @@ let rec load_library_file path =
|
|||||||
Printf.eprintf "[load-library] %s: %s\n%!" (Filename.basename path) msg
|
Printf.eprintf "[load-library] %s: %s\n%!" (Filename.basename path) msg
|
||||||
) exprs
|
) exprs
|
||||||
|
|
||||||
(** IO-aware CEK run — handles suspension by dispatching IO requests.
|
(* IO-aware CEK run (cek_run_with_io, below) — handles suspension by dispatching
|
||||||
Import requests are handled locally (load .sx file).
|
IO requests. Import requests are handled locally (load .sx file). *)
|
||||||
Other IO requests are sent to the Python bridge. *)
|
(** Resolve a single IO request value to its response. Shared by
|
||||||
|
cek_run_with_io's suspension loop AND the _cek_io_resolver installed for the
|
||||||
|
http-listen serving path, so the synchronous inline-resolve path (sx_vm.ml's
|
||||||
|
HO-callback suspend fix) resolves durable reads byte-identically to the
|
||||||
|
CEK-driven path. Without an installed resolver, a `perform` inside an HO
|
||||||
|
primitive callback (map/filter/…) unwinds the native loop and corrupts the
|
||||||
|
stack — the host's map/rest/drop serving-JIT miscompile. *)
|
||||||
|
and resolve_io_request request =
|
||||||
|
let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in
|
||||||
|
(match op with
|
||||||
|
| "import" ->
|
||||||
|
(* Resolve library locally — load the .sx file *)
|
||||||
|
let lib_spec = Sx_runtime.get_val request (String "library") in
|
||||||
|
let key = Sx_ref.library_name_key lib_spec in
|
||||||
|
if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then
|
||||||
|
(* Already loaded — just resume *)
|
||||||
|
Nil
|
||||||
|
else begin
|
||||||
|
(match resolve_library_path lib_spec with
|
||||||
|
| Some path -> load_library_file path
|
||||||
|
| None ->
|
||||||
|
Printf.eprintf "[import] WARNING: no file for library %s\n%!"
|
||||||
|
(Sx_runtime.value_to_str lib_spec));
|
||||||
|
Nil
|
||||||
|
end
|
||||||
|
| "text-measure" ->
|
||||||
|
let args = let a = Sx_runtime.get_val request (String "args") in
|
||||||
|
(match a with List l -> l | _ -> [a]) in
|
||||||
|
let font = match args with String f :: _ -> f | _ -> "serif" in
|
||||||
|
let size = match args with
|
||||||
|
| [_font; Number sz; _text] -> sz
|
||||||
|
| [_font; Number sz] -> sz
|
||||||
|
| _ -> 16.0 in
|
||||||
|
let text = match args with
|
||||||
|
| [_font; _sz; String t] -> t
|
||||||
|
| _ -> "" in
|
||||||
|
let (w, h, asc, desc) = measure_text_otfm font size text in
|
||||||
|
let d = Hashtbl.create 4 in
|
||||||
|
Hashtbl.replace d "width" (Number w);
|
||||||
|
Hashtbl.replace d "height" (Number h);
|
||||||
|
Hashtbl.replace d "ascent" (Number asc);
|
||||||
|
Hashtbl.replace d "descent" (Number desc);
|
||||||
|
Dict d
|
||||||
|
| _ ->
|
||||||
|
let argsv = Sx_runtime.get_val request (String "args") in
|
||||||
|
(match Sx_persist_store.handle_op op argsv with
|
||||||
|
| Some resp -> resp
|
||||||
|
| None ->
|
||||||
|
let args = (match argsv with List l -> l | _ -> [argsv]) in
|
||||||
|
io_request op args))
|
||||||
|
|
||||||
and cek_run_with_io state =
|
and cek_run_with_io state =
|
||||||
let s = ref state in
|
let s = ref state in
|
||||||
let is_terminal s = match Sx_ref.cek_terminal_p s with Bool true -> true | _ -> false in
|
let is_terminal s = match Sx_ref.cek_terminal_p s with Bool true -> true | _ -> false in
|
||||||
@@ -535,49 +585,7 @@ and cek_run_with_io state =
|
|||||||
done;
|
done;
|
||||||
if is_suspended !s then begin
|
if is_suspended !s then begin
|
||||||
let request = Sx_runtime.get_val !s (String "request") in
|
let request = Sx_runtime.get_val !s (String "request") in
|
||||||
let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in
|
let response = resolve_io_request request in
|
||||||
let response = match op with
|
|
||||||
| "import" ->
|
|
||||||
(* Resolve library locally — load the .sx file *)
|
|
||||||
let lib_spec = Sx_runtime.get_val request (String "library") in
|
|
||||||
let key = Sx_ref.library_name_key lib_spec in
|
|
||||||
if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then
|
|
||||||
(* Already loaded — just resume *)
|
|
||||||
Nil
|
|
||||||
else begin
|
|
||||||
(match resolve_library_path lib_spec with
|
|
||||||
| Some path -> load_library_file path
|
|
||||||
| None ->
|
|
||||||
Printf.eprintf "[import] WARNING: no file for library %s\n%!"
|
|
||||||
(Sx_runtime.value_to_str lib_spec));
|
|
||||||
Nil
|
|
||||||
end
|
|
||||||
| "text-measure" ->
|
|
||||||
let args = let a = Sx_runtime.get_val request (String "args") in
|
|
||||||
(match a with List l -> l | _ -> [a]) in
|
|
||||||
let font = match args with String f :: _ -> f | _ -> "serif" in
|
|
||||||
let size = match args with
|
|
||||||
| [_font; Number sz; _text] -> sz
|
|
||||||
| [_font; Number sz] -> sz
|
|
||||||
| _ -> 16.0 in
|
|
||||||
let text = match args with
|
|
||||||
| [_font; _sz; String t] -> t
|
|
||||||
| _ -> "" in
|
|
||||||
let (w, h, asc, desc) = measure_text_otfm font size text in
|
|
||||||
let d = Hashtbl.create 4 in
|
|
||||||
Hashtbl.replace d "width" (Number w);
|
|
||||||
Hashtbl.replace d "height" (Number h);
|
|
||||||
Hashtbl.replace d "ascent" (Number asc);
|
|
||||||
Hashtbl.replace d "descent" (Number desc);
|
|
||||||
Dict d
|
|
||||||
| _ ->
|
|
||||||
let argsv = Sx_runtime.get_val request (String "args") in
|
|
||||||
(match Sx_persist_store.handle_op op argsv with
|
|
||||||
| Some resp -> resp
|
|
||||||
| None ->
|
|
||||||
let args = (match argsv with List l -> l | _ -> [argsv]) in
|
|
||||||
io_request op args)
|
|
||||||
in
|
|
||||||
s := Sx_ref.cek_resume !s response;
|
s := Sx_ref.cek_resume !s response;
|
||||||
loop ()
|
loop ()
|
||||||
end else
|
end else
|
||||||
@@ -755,6 +763,17 @@ let setup_evaluator_bridge env =
|
|||||||
Unix.bind sock
|
Unix.bind sock
|
||||||
(Unix.ADDR_INET (bind_addr, port));
|
(Unix.ADDR_INET (bind_addr, port));
|
||||||
Unix.listen sock 64;
|
Unix.listen sock 64;
|
||||||
|
(* Install the synchronous IO resolver for the serving path. Without it, a
|
||||||
|
`perform` (durable kv read) that fires inside an HO-primitive callback
|
||||||
|
(map/filter/reduce/…) during request handling suspends through the
|
||||||
|
native OCaml loop, dropping its iteration state and leaving the stack
|
||||||
|
misaligned — the serving-JIT host miscompile (map/rest/drop wrong args,
|
||||||
|
blank pages, empty picker). With a resolver installed, sx_vm.ml resolves
|
||||||
|
that callback's IO inline (byte-identically to cek_run_with_io) and the
|
||||||
|
loop is never unwound. Only set if one isn't already installed. *)
|
||||||
|
(if !Sx_types._cek_io_resolver = None then
|
||||||
|
Sx_types._cek_io_resolver :=
|
||||||
|
Some (fun request _state -> resolve_io_request request));
|
||||||
(* SX runtime is shared across threads — serialize handler calls. *)
|
(* SX runtime is shared across threads — serialize handler calls. *)
|
||||||
let mtx = Mutex.create () in
|
let mtx = Mutex.create () in
|
||||||
let reason = function
|
let reason = function
|
||||||
|
|||||||
@@ -99,19 +99,14 @@ EPOCH=1
|
|||||||
for M in "${MODULES[@]}"; do
|
for M in "${MODULES[@]}"; do
|
||||||
echo "(epoch $EPOCH)"; echo "(load \"$M\")"; EPOCH=$((EPOCH+1))
|
echo "(epoch $EPOCH)"; echo "(load \"$M\")"; EPOCH=$((EPOCH+1))
|
||||||
done
|
done
|
||||||
# Serving-mode JIT: exclude the request-path SX (host app + Dream framework). The
|
# 100% serving JIT — NO host exclude. The serving-JIT perform-in-HO-callback
|
||||||
# 100%-JIT experiment surfaced the key finding — the kernel miscompile isn't just
|
# miscompile (map/rest/drop wrong args → blank pages, empty picker) is fixed by
|
||||||
# 500s, it SILENTLY returns wrong results (the `drop` in host/blog-relate-options
|
# two composing pieces: sx-vm-extensions 81177d0e resolves a callback's IO
|
||||||
# yielded an empty candidate list → broken relate picker, NO error logged). Silent
|
# inline (instead of unwinding the native HO loop) WHEN a synchronous resolver
|
||||||
# corruption is worse than a crash, so these run on CEK (they're IO-bound — no perf
|
# is installed, and sx_server.ml's http-listen now installs that resolver (it
|
||||||
# loss) while the kernel bug is fixed upstream (sx-vm-extensions, OP_PERFORM resume
|
# mirrors cek_run_with_io exactly). So the whole request path — host app +
|
||||||
# — see plans/HANDOFF-jit-miscompile.md). The Datalog/relations JIT (the real win)
|
# Dream + Datalog — runs under JIT with no exclude. Verified: ephemeral durable
|
||||||
# stays on. Drop this exclude once the resume bug lands, then go 100% JIT again.
|
# server, 100% JIT, zero fallbacks, real content, picker lists candidates.
|
||||||
if [ "${SX_SERVING_JIT:-}" = "1" ]; then
|
|
||||||
echo "(epoch $EPOCH)"
|
|
||||||
echo "(eval \"(jit-exclude! \\\"host/*\\\" \\\"dream-*\\\" \\\"dr/*\\\")\")"
|
|
||||||
EPOCH=$((EPOCH+1))
|
|
||||||
fi
|
|
||||||
# Point the blog at the DURABLE file backend (persists under $SX_PERSIST_DIR),
|
# Point the blog at the DURABLE file backend (persists under $SX_PERSIST_DIR),
|
||||||
# then idempotently seed a welcome post (sx_content = SX element markup, the
|
# then idempotently seed a welcome post (sx_content = SX element markup, the
|
||||||
# editor's content model). Re-seeding is a no-op if the slug already exists.
|
# editor's content model). Re-seeding is a no-op if the slug already exists.
|
||||||
|
|||||||
@@ -1,5 +1,34 @@
|
|||||||
# Hand-off: serving-mode JIT miscompiles host handlers (to sx-vm-extensions)
|
# Hand-off: serving-mode JIT miscompiles host handlers (to sx-vm-extensions)
|
||||||
|
|
||||||
|
> ## ✅ RESOLVED 2026-06-28 — host now runs 100% serving JIT, no exclude.
|
||||||
|
>
|
||||||
|
> Two composing pieces fixed it:
|
||||||
|
> 1. **sx-vm-extensions `81177d0e`** (`sx_vm.ml` `call_closure_reuse`): when an
|
||||||
|
> HO-primitive callback (map/filter/reduce/…) suspends on a `perform` AND a
|
||||||
|
> synchronous resolver is installed, resolve its IO inline and run it to
|
||||||
|
> completion instead of unwinding the native loop (which dropped iteration
|
||||||
|
> state and misaligned the stack → the next `CALL_PRIM` got wrong args).
|
||||||
|
> 2. **host side (`sx_server.ml`)**: that fix only engages when
|
||||||
|
> `!_cek_io_resolver = Some`. The host serves via the `http-listen` primitive,
|
||||||
|
> whose handler drove durable IO through `cek_run_with_io` with the resolver
|
||||||
|
> **= None**, so it hit the unwinding path the fix doesn't cover (the
|
||||||
|
> vm-extensions repro `repro_jit_resume.ml` *installed* a resolver, so it never
|
||||||
|
> exercised the host's real path). Fix: extracted `cek_run_with_io`'s IO
|
||||||
|
> resolution into `resolve_io_request`, and `http-listen` now installs
|
||||||
|
> `_cek_io_resolver := Some (fun req _ -> resolve_io_request req)` — byte-
|
||||||
|
> identical resolution, so the inline-resolve path resolves durable reads
|
||||||
|
> exactly as the CEK loop would.
|
||||||
|
>
|
||||||
|
> Verified: host conformance **271/271**; ephemeral durable server at 100% JIT
|
||||||
|
> (no exclude) — zero fallbacks, real content, related posts shown, picker lists
|
||||||
|
> 12 candidates; live blog.rose-ash.com home/post/tags 200 with related posts and
|
||||||
|
> zero error-log lines; relate-picker Playwright **4/4** (infinite-scroll +
|
||||||
|
> filter + relate, the `drop` path). `serve.sh` exclude dropped.
|
||||||
|
>
|
||||||
|
> Everything below is the original hand-off, kept for the record.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
> From the **host-on-sx** loop, 2026-06-28. We enabled `SX_SERVING_JIT=1` on the
|
> From the **host-on-sx** loop, 2026-06-28. We enabled `SX_SERVING_JIT=1` on the
|
||||||
> live host (blog.rose-ash.com) — the Datalog/relations saturation JITs cleanly
|
> live host (blog.rose-ash.com) — the Datalog/relations saturation JITs cleanly
|
||||||
> and is the real win (host conformance 271/271 under JIT, 5.4× faster; live
|
> and is the real win (host conformance 271/271 under JIT, 5.4× faster; live
|
||||||
|
|||||||
Reference in New Issue
Block a user