#!/bin/bash # test-protocol-gate.sh — W14 pins for the epoch/command-channel protocol. # # Pins C1/C1b (review, plans/sx-review/hosts.md): a malformed or non-ASCII # line on the top-level command channel used to raise an uncaught # Sx_types.Parse_error and KILL the whole sx_server process (the shared # channel used by bridges and conformance runners). Fixed in dc7aa709: # the server now answers `(error N "Malformed command line: ...")` and # keeps serving. # # Each case spawns its OWN timeout-bounded sx_server.exe subprocess — # no shared/sibling process is ever touched. Designed to grow into the # W14 section-E protocol fuzz suite (C3-C7). # # Usage: bash scripts/test-protocol-gate.sh # Exit: 0 = all pins green; 1 = a pin failed (fix regressed). set -uo pipefail cd "$(dirname "$0")/.." SERVER=hosts/ocaml/_build/default/bin/sx_server.exe if [[ ! -x "$SERVER" ]]; then echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2 exit 2 fi pass=0 fail=0 # run_case NAME INPUT EXPECT_SENTINEL # Feeds INPUT to a fresh server. Asserts: # 1. an (error ... "Malformed command line: ...") response is emitted # 2. the follow-up epoch still evaluates (EXPECT_SENTINEL in output) # 3. the process exits cleanly (no Fatal error, exit 0 on stdin EOF) run_case() { local name="$1" input="$2" sentinel="$3" local out rc out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1) rc=$? local ok=1 if ! grep -q 'Malformed command line' <<<"$out"; then echo "FAIL: $name — no malformed-line error response"; ok=0 fi if ! grep -q "^${sentinel}\$" <<<"$out"; then echo "FAIL: $name — follow-up epoch did not run (process died?)"; ok=0 fi if grep -q 'Fatal error' <<<"$out"; then echo "FAIL: $name — Fatal error escaped to the top level"; ok=0 fi if [[ $rc -ne 0 ]]; then echo "FAIL: $name — nonzero exit ($rc)"; ok=0 fi if [[ $ok -eq 1 ]]; then echo "PASS: $name" pass=$((pass+1)) else echo " --- output ---"; sed 's/^/ /' <<<"$out"; echo " --------------" fail=$((fail+1)) fi } # C1: unterminated list on the command channel (exact review repro) run_case "C1 unterminated list survives" \ '(epoch 2)\n(eval "(+ 1 2"\n(epoch 3)\n(eval "99")\n' \ '99' # C1: plain-garbage line (second C1 repro shape) run_case "C1 garbage line survives" \ '(epoch 1)\nnot an s-expr ]]] {{{\n(epoch 2)\n(eval "42")\n' \ '42' # C1b: non-ASCII byte on the command channel (exact review repro; \xc3\xa9 = é) run_case "C1b non-ASCII line survives" \ '(epoch 1)\n(eval (quote caf\xc3\xa9))\n(epoch 2)\n(eval "99")\n' \ '99' # Control: a well-formed session still works end to end ctrl=$(printf '(epoch 1)\n(eval "(+ 40 2)")\n' | timeout 60 "$SERVER" 2>&1) if grep -q '^42$' <<<"$ctrl"; then echo "PASS: control well-formed session" pass=$((pass+1)) else echo "FAIL: control well-formed session"; sed 's/^/ /' <<<"$ctrl" fail=$((fail+1)) fi # --------------------------------------------------------------------------- # C3–C7 protocol-quirk LEDGER (hosts.md, all OPEN server-side). These pin # CURRENT behavior, verified live 2026-07-04 — they are documentation, not # endorsement. When a server fix lands and a pin fails, update the ledger # to assert the corrected behavior (bidirectional, like test-env-parity.sh). # --------------------------------------------------------------------------- # ledger_case NAME INPUT GREP_MUST GREP_MUST2 ledger_case() { local name="$1" input="$2" must="$3" must2="${4:-}" local out out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1) local ok=1 grep -q -- "$must" <<<"$out" || { echo "FAIL: $name — expected: $must"; ok=0; } if [[ -n "$must2" ]]; then grep -q -- "$must2" <<<"$out" || { echo "FAIL: $name — expected: $must2"; ok=0; } fi if grep -q 'Fatal error' <<<"$out"; then echo "FAIL: $name — process died"; ok=0 fi if [[ $ok -eq 1 ]]; then echo "PASS: $name"; pass=$((pass+1)); else echo " --- output ---"; sed 's/^/ /' <<<"$out"; fail=$((fail+1)); fi } # C3: stray (io-response ...) is answered as Unknown command (dead guard) — # an EXTRA response the client didn't ask for; process keeps serving. ledger_case "C3 ledger: stray io-response gets an extra error reply" \ '(epoch 1)\n(io-response 1 42)\n(eval "5")\n' \ 'Unknown command: (io-response 1 42)' '^5$' # C4: malformed (epoch) doesn't update the epoch — next reply tagged with # the OLD epoch (0 here), i.e. stale from the client's viewpoint. ledger_case "C4 ledger: malformed epoch marker leaves epoch stale" \ '(epoch)\n(eval "2")\n' \ '(ok-len 0 1)' '^2$' # C5: no monotonic-epoch enforcement — a decreasing epoch is accepted. ledger_case "C5 ledger: decreasing epoch accepted silently" \ '(epoch 9)\n(epoch 3)\n(eval "42")\n' \ '(ok-len 3 2)' '^42$' # C6: two commands on one line -> one error, NEITHER executed. ledger_case "C6 ledger: two commands on one line both dropped" \ '(epoch 1)\n(eval "1") (eval "2")\n(eval "3")\n' \ 'Expected single command, got 2' '^3$' # C7: vm-trace without the compiler loaded errors opaquely. ledger_case "C7 ledger: vm-trace sans compiler is opaque Not-callable-nil" \ '(epoch 1)\n(vm-trace "(+ 1 2)")\n' \ 'Not callable: nil' # --------------------------------------------------------------------------- # Fuzz-liveness property: after 60 deterministic hostile lines (unbalanced # parens, control chars, unicode, long lines, stray io-responses, epoch # mutations), the server must still answer a well-formed command and exit # cleanly. Seeded PRNG — reproducible corpus. # --------------------------------------------------------------------------- fuzz=$(python3 - <<'PY' import random r = random.Random(1404) lines = [] frag = ['(', ')', '((', '))', '(eval', '(epoch', 'io-response', '"', '\\', 'café', '\x01', '\x1b[2J', ':kw', '{', '}', '(+ 1', 'nil)', '#|', '|#'] for i in range(60): kind = r.randrange(5) if kind == 0: lines.append(''.join(r.choice(frag) for _ in range(r.randrange(1, 8)))) elif kind == 1: lines.append('(epoch ' + r.choice(['', 'foo', '-1', '999999999999999999999', ')']) + ')') elif kind == 2: lines.append('(io-response %d %s' % (r.randrange(99), r.choice([')', '', '42']) )) elif kind == 3: lines.append('x' * r.randrange(200, 2000)) else: lines.append('(eval "' + r.choice(['(+ 1', '(list', '\\\\', '((((']) + '")') print('\n'.join(lines)) PY ) out=$(printf '%s\n(epoch 777)\n(eval "\\"alive\\"")\n' "$fuzz" | timeout 90 "$SERVER" 2>&1) rc=$? if grep -q '^"alive"$' <<<"$out" && ! grep -q 'Fatal error' <<<"$out" && [[ $rc -eq 0 ]]; then echo "PASS: fuzz-liveness — server survives 60 hostile lines and still answers" pass=$((pass+1)) else echo "FAIL: fuzz-liveness (rc=$rc)"; tail -6 <<<"$out" | sed 's/^/ /' fail=$((fail+1)) fi # --------------------------------------------------------------------------- # S4 (review, hosts.md): soft error pages must NOT be stored in the HTTP # response cache. Pre-fix, a routing-failure page was cached as HTTP 200 and # served byte-identically from cache to every later visitor (cold 2s → warm # 0.0005s, ONE render line). Post-fix (dc7aa709), http_render_page returns # (html, is_error) and cache insertion is gated on `not is_err` (the skip is # logged as "[cache] → error page, not cached"). # # Pin: GET the same nonexistent path twice against a fresh --http server and # assert BOTH requests re-render (two [sx-http] render lines) plus the # is_err gate line appearing in the log. NB: in a standalone worktree all # docs pages render as soft error pages (no content), so a positive # "real page IS cached" control is not assertable here. # --------------------------------------------------------------------------- s4_case() { local port=$((18000 + RANDOM % 2000)) local log; log=$(mktemp) timeout 90 "$SERVER" --http "$port" >"$log" 2>&1 & local srv=$! local up=0 for _ in $(seq 1 40); do if curl -s -o /dev/null "http://localhost:$port/" 2>/dev/null; then up=1; break; fi sleep 1 done if [[ $up -ne 1 ]]; then echo "FAIL: S4 — http server did not come up on :$port" kill "$srv" 2>/dev/null; rm -f "$log" fail=$((fail+1)); return fi local miss="/sx/gate-pin-missing-$$-$RANDOM" curl -s -o /dev/null "http://localhost:$port$miss" curl -s -o /dev/null "http://localhost:$port$miss" sleep 1 local renders renders=$(grep -c "sx-http\] $miss " "$log") local ok=1 if [[ "$renders" -ne 2 ]]; then echo "FAIL: S4 — expected 2 renders of $miss (not cache-served), got $renders" ok=0 fi if ! grep -q 'error page, not cached' "$log"; then echo "FAIL: S4 — is_err cache gate line absent from server log" ok=0 fi if [[ $ok -eq 1 ]]; then echo "PASS: S4 soft error page not cached (both GETs re-rendered)" pass=$((pass+1)) else echo " --- log tail ---"; tail -12 "$log" | sed 's/^/ /'; echo " ---------------" fail=$((fail+1)) fi kill "$srv" 2>/dev/null rm -f "$log" } s4_case echo echo "protocol-gate: $pass passed, $fail failed" [[ $fail -eq 0 ]]