From bd108ae7dda7a81a01b8f3ea097d3da363571f3f Mon Sep 17 00:00:00 2001 From: giles Date: Tue, 30 Jun 2026 11:24:29 +0000 Subject: [PATCH] tooling: per-suite conformance filter + live-check.sh; note render-diff to vm-extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - conformance.sh [suite] runs ONE suite (filters the SUITES array so result-parser indices stay aligned; all MODULES still load). 'conformance.sh sxtp' = 0.3s vs ~8min. - lib/host/live-check.sh: non-browser live smoke — boot ephemeral host, login, seed a post (exercises form-ingest write), print status|content-type|body-head per path, assert reads are text/sx + no JSON leak + no 5xx. The counterpart to run-picker-check.sh. - plans/NOTE-render-diff-for-vm-ext.md: defer host_render_diff (JIT-vs-interpreter regression oracle) to the sx-vm-extensions loop — it's their fix's oracle, not a host feature; building it from loops/host would fork JIT-engine understanding. Co-Authored-By: Claude Opus 4.8 --- lib/host/conformance.sh | 33 +++++++++++-- lib/host/live-check.sh | 70 ++++++++++++++++++++++++++++ plans/NOTE-render-diff-for-vm-ext.md | 41 ++++++++++++++++ plans/host-dev-tooling.md | 14 +++++- 4 files changed, 153 insertions(+), 5 deletions(-) create mode 100755 lib/host/live-check.sh create mode 100644 plans/NOTE-render-diff-for-vm-ext.md diff --git a/lib/host/conformance.sh b/lib/host/conformance.sh index f4fbc60c..b979bde2 100755 --- a/lib/host/conformance.sh +++ b/lib/host/conformance.sh @@ -4,8 +4,11 @@ # sx_server process, then reports pass/fail per suite. Mirrors lib/dream's runner. # # Usage: -# bash lib/host/conformance.sh # run all suites -# bash lib/host/conformance.sh -v # verbose (list each suite) +# bash lib/host/conformance.sh # run all suites +# bash lib/host/conformance.sh sxtp # run ONLY the sxtp suite (fast — skips +# # the Datalog-heavy blog suite) +# bash lib/host/conformance.sh blog -v # one suite, verbose +# bash lib/host/conformance.sh -v # all suites, verbose set -uo pipefail cd "$(git rev-parse --show-toplevel)" @@ -19,7 +22,15 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -VERBOSE="${1:-}" +# Args: an optional suite NAME runs just that suite (fast); -v is verbose per-suite. +VERBOSE="" +SUITE_FILTER="" +for arg in "$@"; do + case "$arg" in + -v|--verbose) VERBOSE="-v" ;; + *) SUITE_FILTER="$arg" ;; + esac +done # Kernel + subsystem dependencies, then the host modules. Order matters: # stdlib/r7rs first; the Datalog engine + ACL subsystem (authorisation); the feed @@ -101,6 +112,22 @@ SUITES=( "ledger host-lg-tests-run! lib/host/tests/ledger.sx" ) +# Filter to a single suite if a name was given (filter the array itself so its +# indices stay aligned with the result-parsing loop below). All MODULES still load +# — the host modules are interdependent; only the TEST suites are narrowed. +if [ -n "$SUITE_FILTER" ]; then + _FILTERED=() + for SUITE in "${SUITES[@]}"; do + [ "$(echo "$SUITE" | awk '{print $1}')" = "$SUITE_FILTER" ] && _FILTERED+=("$SUITE") + done + if [ "${#_FILTERED[@]}" -eq 0 ]; then + echo "ERROR: no suite named '$SUITE_FILTER'. Valid names:" >&2 + for SUITE in "${SUITES[@]}"; do echo " $(echo "$SUITE" | awk '{print $1}')" >&2; done + exit 1 + fi + SUITES=("${_FILTERED[@]}") +fi + TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT EPOCH=1 emit_load () { echo "(epoch $EPOCH)"; echo "(load \"$1\")"; EPOCH=$((EPOCH+1)); } diff --git a/lib/host/live-check.sh b/lib/host/live-check.sh new file mode 100755 index 00000000..4a0b9c51 --- /dev/null +++ b/lib/host/live-check.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Non-browser live-check for the host: spins up an EPHEMERAL host server (this +# worktree's binary + lib + web, a temp persist dir), logs in, seeds one post, then +# runs a sequence of HTTP checks printing status | content-type | body-head for each. +# Catches what conformance can't — the real http-listen serving path (serving-JIT +# divergence, VmSuspended renders, content-type regressions) — without a browser and +# without touching live data. The non-Playwright counterpart to run-picker-check.sh. +# +# bash lib/host/live-check.sh # default smoke: /health /posts /feed / // +# bash lib/host/live-check.sh /tags /article/ # check specific GET paths instead +# +# Asserts: reads are text/sx (the SX-native wire), pages are non-empty, no 5xx. +# Requires the OCaml binary built (hosts/ocaml/_build/default/bin/sx_server.exe). +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +PORT="${LIVE_PORT:-8914}" +USER="admin"; PASS="live-check-pw"; SECRET="live-check-secret" +PDIR=$(mktemp -d); JAR=$(mktemp); LOG=$(mktemp); HDR=$(mktemp) +BASE="http://127.0.0.1:$PORT" +RC=0 + +cleanup() { + local pid + pid=$(ss -lptn "sport = :$PORT" 2>/dev/null | grep -oE 'pid=[0-9]+' | head -1 | cut -d= -f2) + [ -n "$pid" ] && kill "$pid" 2>/dev/null + rm -f "$JAR" "$LOG" "$HDR"; rm -rf "$PDIR" +} +trap cleanup EXIT + +echo "== booting ephemeral host on :$PORT (persist=$PDIR) ==" +HOST_PORT="$PORT" SX_PERSIST_DIR="$PDIR" \ + SX_ADMIN_USER="$USER" SX_ADMIN_PASSWORD="$PASS" SX_SESSION_SECRET="$SECRET" \ + bash lib/host/serve.sh >"$LOG" 2>&1 & +for i in $(seq 1 60); do + curl -sf -o /dev/null "$BASE/health" 2>/dev/null && break + sleep 1; [ "$i" = "60" ] && { echo "server never came up:"; cat "$LOG"; exit 1; } +done +echo "== up ==" + +# Log in + seed one post (also exercises the form-ingest write path). +curl -s -c "$JAR" -o /dev/null -X POST "$BASE/login" --data "username=$USER&password=$PASS" +curl -s -b "$JAR" -o /dev/null -X POST "$BASE/new" \ + --data 'title=Live Check Post&sx_content=(article (h1 "Live Check Post") (p "ok"))&status=published' + +# A GET check: prints " | " and flags problems. +check() { + local path="$1" body ct code + body=$(curl -s -b "$JAR" -D "$HDR" "$BASE$path") + code=$(awk 'NR==1{print $2}' "$HDR") + ct=$(grep -i '^content-type:' "$HDR" | head -1 | tr -d '\r' | sed 's/content-type: *//I') + printf ' %-20s %s %-26s | %s\n' "$path" "${code:-???}" "${ct:-?}" "$(printf '%s' "$body" | tr '\n' ' ' | cut -c1-70)" + case "$code" in 5*) echo " !! 5xx"; RC=1 ;; esac + [ -z "$body" ] && { echo " !! empty body"; RC=1; } + # data endpoints must be SX, never JSON + case "$path" in + /posts|/feed) echo "$ct" | grep -qi 'text/sx' || { echo " !! expected text/sx, got '$ct'"; RC=1; } + printf '%s' "$body" | grep -q '"ok":' && { echo " !! JSON leaked"; RC=1; } ;; + esac +} + +echo "== checks ==" +if [ "$#" -gt 0 ]; then + for p in "$@"; do check "$p"; done +else + for p in /health /posts /feed / /live-check-post/; do check "$p"; done +fi + +echo "== done (rc $RC) ==" +exit $RC diff --git a/plans/NOTE-render-diff-for-vm-ext.md b/plans/NOTE-render-diff-for-vm-ext.md new file mode 100644 index 00000000..dd257c05 --- /dev/null +++ b/plans/NOTE-render-diff-for-vm-ext.md @@ -0,0 +1,41 @@ +# NOTE → the `sx-vm-extensions` loop: `host_render_diff` is yours to own + +**From:** the host-on-sx loop (`loops/host`). **Date:** 2026-06-30. + +## The ask + +I proposed a tool, **`host_render_diff`** — render a route **twice**, once through the +serving JIT and once through the CEK interpreter, and **diff the HTML**. Any divergence IS a +serving-JIT miscompile, surfaced at build time instead of live. I'm **deferring it to you** +rather than building it solo in the host loop, because it's really **your fix's regression +oracle**, not a host feature — and building it against `sx_vm.ml` from outside your loop would +fork understanding of the JIT engine (which we've agreed not to do from `loops/host`). + +## Why it matters (the bug it targets) + +The host has been bitten repeatedly by the serving-JIT miscompile you own: `map`/`for-each` +over a **function-produced list** under the `http-listen` + `cek_run_with_io` serving path +processes only the first element and **silently returns wrong results** (blank pages, empty +pickers) with no error logged. Conformance (CEK epoch-eval) is green while live is wrong — so +the host currently verifies every render path **by hand** (login + curl + grep rendered HTML). +A render-diff makes that mechanical. See `plans/HANDOFF-jit-miscompile.md` and +`[[feedback_host_serving_jit_iteration]]`. + +## What it would look like + +- Input: a route (+ optional seed/auth), rendered once with `SX_SERVING_JIT=1` and once on + pure CEK. Output: a normalized-HTML diff; non-empty diff = miscompile. +- Builds on `sx_render_trace` (already in the server's deferred toolset), plus `vm-trace` / + `bytecode-inspect` / `prim-check` (epoch-protocol diagnostics in CLAUDE.md). +- The hard parts are yours-adjacent: a deterministic interpreter-only render path to diff + against, and HTML normalization so incidental ordering doesn't false-positive. + +## Host status (context for you) + +The host runs CEK-only in serving mode (`serve.sh` does `jit-exclude! "host/*" "dream-*" +"dr/*"` when `SX_SERVING_JIT=1`); Datalog/relations JIT stays (the win). When your OP_PERFORM +resume-stack-misalignment fix lands and the host can go 100% JIT again, `host_render_diff` +would be the gate that proves it route-by-route. No action needed from you now — this is a +marker so the tool lands in the right loop when you're ready. + +— host-on-sx diff --git a/plans/host-dev-tooling.md b/plans/host-dev-tooling.md index 960480e3..1c21c526 100644 --- a/plans/host-dev-tooling.md +++ b/plans/host-dev-tooling.md @@ -16,7 +16,11 @@ eyeballed live** (login + curl + grep the rendered HTML). The tools below make t eventually, automatic. See `[[feedback_host_serving_jit_iteration]]`, `[[project_sx_engine_harness_tests]]`. -## 1. `host_conformance(suite?)` — per-suite, fast (trivial; do first) +## 1. `host_conformance(suite?)` — per-suite, fast (trivial; do first) — DONE 2026-06-30 + +`conformance.sh [suite] [-v]` now takes an optional suite name (filters the SUITES array so +result-parser indices stay aligned; all MODULES still load). `conformance.sh sxtp` runs in +**0.3s** vs ~8min for the full Datalog-heavy run. Bad name → error listing valid suites. Today `conformance.sh` runs all 11 suites (~10 min, all-or-nothing). Iterating on one subsystem means hand-extracting the `MODULES` array to build a focused runner (done by hand this session). @@ -28,7 +32,13 @@ means hand-extracting the `MODULES` array to build a focused runner (done by han - **Effort:** ~1 line of bash + arg parse. **Payoff:** every remaining iteration of this loop. - **Not MCP-shaped on its own** — the bash arg is 90% of the value; wrap only if convenient. -## 2. `host_live_check(seed, requests)` — rendered HTML from an ephemeral server (high ROI) +## 2. `host_live_check` — rendered HTML from an ephemeral server (high ROI) — DONE 2026-06-30 + +Built as `lib/host/live-check.sh` (shell, the right grain — matches run-picker-check.sh). Boots +an ephemeral host, logs in, seeds a post (exercising the form-ingest write path), then prints +`status | content-type | body-head` for `/health /posts /feed / //` (or paths passed as +args). Asserts reads are `text/sx`, no JSON leak, no 5xx, non-empty bodies — ~10s, no browser. +Caught nothing new today (the wire was already verified) but it's the standing pre-deploy smoke. Generalize `lib/host/playwright/run-picker-check.sh` from "the picker" to "any route." Boot an ephemeral host server on a temp persist dir, seed posts, run an **authed request sequence**, and