#!/bin/bash # test-differential.sh — W14/F8: cross-host differential battery. # # Evaluates every expression in spec/tests/differential-probes.txt on: # A) the native server (sx_server.exe, epoch protocol) — its printer # B) the SHIPPED browser kernel (eval_wasm_probes.js, guest sx-serialize) # and diffs the outputs. The review's original 130-probe corpus was # ephemeral (F-8); this is the committed replacement. # # KNOWN_DIVERGENT is the ledger of confirmed, still-open divergences — # keyed by the probe EXPRESSION. Red on a NEW divergence (host drift) and # red on a HEALED one (fix landed: delete the entry, locking in parity). # # Method note (finding refinement, 2026-07-04): comparing raw K.eval # JS-boundary values shows float-display divergences (0.3 vs # 0.30000000000000004) that DISAPPEAR under guest-level (sx-serialize …) — # the F-1 float-display class is a JS-boundary artifact, not a kernel # serialization divergence. This battery compares guest serialization. set -uo pipefail cd "$(dirname "$0")/.." SERVER=hosts/ocaml/_build/default/bin/sx_server.exe WASM=shared/static/wasm/sx_browser.bc.wasm.js PROBES=spec/tests/differential-probes.txt [[ -x "$SERVER" ]] || { echo "SKIP: $SERVER not built" >&2; exit 2; } [[ -f "$WASM" ]] || { echo "SKIP: $WASM missing" >&2; exit 2; } # --- KNOWN_DIVERGENT ledger (verified live 2026-07-04) ------------------- # F-3/K53: bare sx_server's `apply` does NOT spread its argument list — # (apply + (list 1 2 3)) errors "Expected number, got list"; (apply str l) # returns the serialized list as one string. The WASM kernel spreads # correctly. The test runner masks this with its own apply (F-7 class). declare -A KNOWN_DIVERGENT KNOWN_DIVERGENT['(apply + (list 1 2 3))']="F-3: native apply does not spread" KNOWN_DIVERGENT['(apply max (list 1 5 2))']="F-3: native apply does not spread" KNOWN_DIVERGENT['(apply str (list "a" "b"))']="F-3: native apply does not spread" native=$(mktemp); wasm=$(mktemp) python3 - "$SERVER" "$PROBES" > "$native" <<'PY' import json, subprocess, sys server, probefile = sys.argv[1], sys.argv[2] probes = [l.strip() for l in open(probefile) if l.strip() and not l.startswith('#')] inp = [] for i, p in enumerate(probes): inp.append(f"(epoch {i+1})") inp.append(f"(eval {json.dumps(p)})") out = subprocess.run(["timeout", "120", server], input="\n".join(inp) + "\n", capture_output=True, text=True).stdout res, cur = {}, None for l in out.splitlines(): if l.startswith("(ok-len "): cur = int(l.split()[1]); res[cur] = None elif l.startswith("(error "): idx = int(l.split()[1]); res[idx] = "ERROR"; cur = None elif cur is not None and res.get(cur) is None: res[cur] = l; cur = None for i, p in enumerate(probes): print(f"PROBE {i+1} {res.get(i+1, '')}") PY timeout 300 node hosts/ocaml/browser/eval_wasm_probes.js "$PROBES" > "$wasm" 2>/dev/null pass=0; fail=0; i=0 while IFS= read -r expr; do [[ -z "$expr" || "$expr" == \#* ]] && continue i=$((i+1)) a=$(sed -n "s/^PROBE $i //p" "$native") b=$(sed -n "s/^PROBE $i //p" "$wasm") known="${KNOWN_DIVERGENT[$expr]:-}" if [[ "$a" == "$b" ]]; then if [[ -n "$known" ]]; then echo "RED: $expr — KNOWN_DIVERGENT now AGREES ($known); delete from ledger" fail=$((fail+1)) else pass=$((pass+1)) fi else if [[ -n "$known" ]]; then echo "KNOWN-DIVERGENT: $expr ($known)" pass=$((pass+1)) else echo "RED: $expr" echo " native: $a" echo " wasm: $b" fail=$((fail+1)) fi fi done < <(grep -v '^\s*#' "$PROBES" | grep -v '^\s*$') rm -f "$native" "$wasm" echo echo "differential: $i probes, $pass in agreement/ledgered, $fail red" [[ $fail -eq 0 ]]