All five protocol quirks are OPEN server-side, so the suite pins CURRENT
behavior (verified live) as a bidirectional ledger in
scripts/test-protocol-gate.sh:
- C3: stray (io-response ...) answered as Unknown command (dead guard)
- C4: malformed (epoch) errors and leaves the epoch stale (envelope
changed since the finding: the dc7aa709 guard answers rather than kills)
- C5: decreasing epoch accepted silently (no monotonic enforcement)
- C6: two commands on one line -> one error, neither executed
- C7: vm-trace without compiler -> opaque "Not callable: nil"
Plus the fuzz property that matters: 60 deterministically-seeded hostile
lines (unbalanced parens, control chars, unicode, 2KB lines, stray
io-responses, epoch mutations) followed by a well-formed command — the
server must still answer and exit cleanly. protocol-gate: 11/11.
When a server-side fix lands, the matching ledger pin fails loudly and the
ledger is updated to assert the corrected behavior.
Test-only: no semantics edits, no push.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
234 lines
8.9 KiB
Bash
Executable File
234 lines
8.9 KiB
Bash
Executable File
#!/bin/bash
|
||
# test-protocol-gate.sh — W14 pins for the epoch/command-channel protocol.
|
||
#
|
||
# Pins C1/C1b (review, plans/sx-review/hosts.md): a malformed or non-ASCII
|
||
# line on the top-level command channel used to raise an uncaught
|
||
# Sx_types.Parse_error and KILL the whole sx_server process (the shared
|
||
# channel used by bridges and conformance runners). Fixed in dc7aa709:
|
||
# the server now answers `(error N "Malformed command line: ...")` and
|
||
# keeps serving.
|
||
#
|
||
# Each case spawns its OWN timeout-bounded sx_server.exe subprocess —
|
||
# no shared/sibling process is ever touched. Designed to grow into the
|
||
# W14 section-E protocol fuzz suite (C3-C7).
|
||
#
|
||
# Usage: bash scripts/test-protocol-gate.sh
|
||
# Exit: 0 = all pins green; 1 = a pin failed (fix regressed).
|
||
set -uo pipefail
|
||
|
||
cd "$(dirname "$0")/.."
|
||
SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
|
||
|
||
if [[ ! -x "$SERVER" ]]; then
|
||
echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2
|
||
exit 2
|
||
fi
|
||
|
||
pass=0
|
||
fail=0
|
||
|
||
# run_case NAME INPUT EXPECT_SENTINEL
|
||
# Feeds INPUT to a fresh server. Asserts:
|
||
# 1. an (error ... "Malformed command line: ...") response is emitted
|
||
# 2. the follow-up epoch still evaluates (EXPECT_SENTINEL in output)
|
||
# 3. the process exits cleanly (no Fatal error, exit 0 on stdin EOF)
|
||
run_case() {
|
||
local name="$1" input="$2" sentinel="$3"
|
||
local out rc
|
||
out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1)
|
||
rc=$?
|
||
local ok=1
|
||
if ! grep -q 'Malformed command line' <<<"$out"; then
|
||
echo "FAIL: $name — no malformed-line error response"; ok=0
|
||
fi
|
||
if ! grep -q "^${sentinel}\$" <<<"$out"; then
|
||
echo "FAIL: $name — follow-up epoch did not run (process died?)"; ok=0
|
||
fi
|
||
if grep -q 'Fatal error' <<<"$out"; then
|
||
echo "FAIL: $name — Fatal error escaped to the top level"; ok=0
|
||
fi
|
||
if [[ $rc -ne 0 ]]; then
|
||
echo "FAIL: $name — nonzero exit ($rc)"; ok=0
|
||
fi
|
||
if [[ $ok -eq 1 ]]; then
|
||
echo "PASS: $name"
|
||
pass=$((pass+1))
|
||
else
|
||
echo " --- output ---"; sed 's/^/ /' <<<"$out"; echo " --------------"
|
||
fail=$((fail+1))
|
||
fi
|
||
}
|
||
|
||
# C1: unterminated list on the command channel (exact review repro)
|
||
run_case "C1 unterminated list survives" \
|
||
'(epoch 2)\n(eval "(+ 1 2"\n(epoch 3)\n(eval "99")\n' \
|
||
'99'
|
||
|
||
# C1: plain-garbage line (second C1 repro shape)
|
||
run_case "C1 garbage line survives" \
|
||
'(epoch 1)\nnot an s-expr ]]] {{{\n(epoch 2)\n(eval "42")\n' \
|
||
'42'
|
||
|
||
# C1b: non-ASCII byte on the command channel (exact review repro; \xc3\xa9 = é)
|
||
run_case "C1b non-ASCII line survives" \
|
||
'(epoch 1)\n(eval (quote caf\xc3\xa9))\n(epoch 2)\n(eval "99")\n' \
|
||
'99'
|
||
|
||
# Control: a well-formed session still works end to end
|
||
ctrl=$(printf '(epoch 1)\n(eval "(+ 40 2)")\n' | timeout 60 "$SERVER" 2>&1)
|
||
if grep -q '^42$' <<<"$ctrl"; then
|
||
echo "PASS: control well-formed session"
|
||
pass=$((pass+1))
|
||
else
|
||
echo "FAIL: control well-formed session"; sed 's/^/ /' <<<"$ctrl"
|
||
fail=$((fail+1))
|
||
fi
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# C3–C7 protocol-quirk LEDGER (hosts.md, all OPEN server-side). These pin
|
||
# CURRENT behavior, verified live 2026-07-04 — they are documentation, not
|
||
# endorsement. When a server fix lands and a pin fails, update the ledger
|
||
# to assert the corrected behavior (bidirectional, like test-env-parity.sh).
|
||
# ---------------------------------------------------------------------------
|
||
|
||
# ledger_case NAME INPUT GREP_MUST GREP_MUST2
|
||
ledger_case() {
|
||
local name="$1" input="$2" must="$3" must2="${4:-}"
|
||
local out
|
||
out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1)
|
||
local ok=1
|
||
grep -q -- "$must" <<<"$out" || { echo "FAIL: $name — expected: $must"; ok=0; }
|
||
if [[ -n "$must2" ]]; then
|
||
grep -q -- "$must2" <<<"$out" || { echo "FAIL: $name — expected: $must2"; ok=0; }
|
||
fi
|
||
if grep -q 'Fatal error' <<<"$out"; then
|
||
echo "FAIL: $name — process died"; ok=0
|
||
fi
|
||
if [[ $ok -eq 1 ]]; then echo "PASS: $name"; pass=$((pass+1));
|
||
else echo " --- output ---"; sed 's/^/ /' <<<"$out"; fail=$((fail+1)); fi
|
||
}
|
||
|
||
# C3: stray (io-response ...) is answered as Unknown command (dead guard) —
|
||
# an EXTRA response the client didn't ask for; process keeps serving.
|
||
ledger_case "C3 ledger: stray io-response gets an extra error reply" \
|
||
'(epoch 1)\n(io-response 1 42)\n(eval "5")\n' \
|
||
'Unknown command: (io-response 1 42)' '^5$'
|
||
|
||
# C4: malformed (epoch) doesn't update the epoch — next reply tagged with
|
||
# the OLD epoch (0 here), i.e. stale from the client's viewpoint.
|
||
ledger_case "C4 ledger: malformed epoch marker leaves epoch stale" \
|
||
'(epoch)\n(eval "2")\n' \
|
||
'(ok-len 0 1)' '^2$'
|
||
|
||
# C5: no monotonic-epoch enforcement — a decreasing epoch is accepted.
|
||
ledger_case "C5 ledger: decreasing epoch accepted silently" \
|
||
'(epoch 9)\n(epoch 3)\n(eval "42")\n' \
|
||
'(ok-len 3 2)' '^42$'
|
||
|
||
# C6: two commands on one line -> one error, NEITHER executed.
|
||
ledger_case "C6 ledger: two commands on one line both dropped" \
|
||
'(epoch 1)\n(eval "1") (eval "2")\n(eval "3")\n' \
|
||
'Expected single command, got 2' '^3$'
|
||
|
||
# C7: vm-trace without the compiler loaded errors opaquely.
|
||
ledger_case "C7 ledger: vm-trace sans compiler is opaque Not-callable-nil" \
|
||
'(epoch 1)\n(vm-trace "(+ 1 2)")\n' \
|
||
'Not callable: nil'
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Fuzz-liveness property: after 60 deterministic hostile lines (unbalanced
|
||
# parens, control chars, unicode, long lines, stray io-responses, epoch
|
||
# mutations), the server must still answer a well-formed command and exit
|
||
# cleanly. Seeded PRNG — reproducible corpus.
|
||
# ---------------------------------------------------------------------------
|
||
fuzz=$(python3 - <<'PY'
|
||
import random
|
||
r = random.Random(1404)
|
||
lines = []
|
||
frag = ['(', ')', '((', '))', '(eval', '(epoch', 'io-response', '"', '\\',
|
||
'café', '\x01', '\x1b[2J', ':kw', '{', '}', '(+ 1', 'nil)', '#|', '|#']
|
||
for i in range(60):
|
||
kind = r.randrange(5)
|
||
if kind == 0:
|
||
lines.append(''.join(r.choice(frag) for _ in range(r.randrange(1, 8))))
|
||
elif kind == 1:
|
||
lines.append('(epoch ' + r.choice(['', 'foo', '-1', '999999999999999999999', ')']) + ')')
|
||
elif kind == 2:
|
||
lines.append('(io-response %d %s' % (r.randrange(99), r.choice([')', '', '42']) ))
|
||
elif kind == 3:
|
||
lines.append('x' * r.randrange(200, 2000))
|
||
else:
|
||
lines.append('(eval "' + r.choice(['(+ 1', '(list', '\\\\', '((((']) + '")')
|
||
print('\n'.join(lines))
|
||
PY
|
||
)
|
||
out=$(printf '%s\n(epoch 777)\n(eval "\\"alive\\"")\n' "$fuzz" | timeout 90 "$SERVER" 2>&1)
|
||
rc=$?
|
||
if grep -q '^"alive"$' <<<"$out" && ! grep -q 'Fatal error' <<<"$out" && [[ $rc -eq 0 ]]; then
|
||
echo "PASS: fuzz-liveness — server survives 60 hostile lines and still answers"
|
||
pass=$((pass+1))
|
||
else
|
||
echo "FAIL: fuzz-liveness (rc=$rc)"; tail -6 <<<"$out" | sed 's/^/ /'
|
||
fail=$((fail+1))
|
||
fi
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# S4 (review, hosts.md): soft error pages must NOT be stored in the HTTP
|
||
# response cache. Pre-fix, a routing-failure page was cached as HTTP 200 and
|
||
# served byte-identically from cache to every later visitor (cold 2s → warm
|
||
# 0.0005s, ONE render line). Post-fix (dc7aa709), http_render_page returns
|
||
# (html, is_error) and cache insertion is gated on `not is_err` (the skip is
|
||
# logged as "[cache] <path> → error page, not cached").
|
||
#
|
||
# Pin: GET the same nonexistent path twice against a fresh --http server and
|
||
# assert BOTH requests re-render (two [sx-http] render lines) plus the
|
||
# is_err gate line appearing in the log. NB: in a standalone worktree all
|
||
# docs pages render as soft error pages (no content), so a positive
|
||
# "real page IS cached" control is not assertable here.
|
||
# ---------------------------------------------------------------------------
|
||
s4_case() {
|
||
local port=$((18000 + RANDOM % 2000))
|
||
local log; log=$(mktemp)
|
||
timeout 90 "$SERVER" --http "$port" >"$log" 2>&1 &
|
||
local srv=$!
|
||
local up=0
|
||
for _ in $(seq 1 40); do
|
||
if curl -s -o /dev/null "http://localhost:$port/" 2>/dev/null; then up=1; break; fi
|
||
sleep 1
|
||
done
|
||
if [[ $up -ne 1 ]]; then
|
||
echo "FAIL: S4 — http server did not come up on :$port"
|
||
kill "$srv" 2>/dev/null; rm -f "$log"
|
||
fail=$((fail+1)); return
|
||
fi
|
||
local miss="/sx/gate-pin-missing-$$-$RANDOM"
|
||
curl -s -o /dev/null "http://localhost:$port$miss"
|
||
curl -s -o /dev/null "http://localhost:$port$miss"
|
||
sleep 1
|
||
local renders
|
||
renders=$(grep -c "sx-http\] $miss " "$log")
|
||
local ok=1
|
||
if [[ "$renders" -ne 2 ]]; then
|
||
echo "FAIL: S4 — expected 2 renders of $miss (not cache-served), got $renders"
|
||
ok=0
|
||
fi
|
||
if ! grep -q 'error page, not cached' "$log"; then
|
||
echo "FAIL: S4 — is_err cache gate line absent from server log"
|
||
ok=0
|
||
fi
|
||
if [[ $ok -eq 1 ]]; then
|
||
echo "PASS: S4 soft error page not cached (both GETs re-rendered)"
|
||
pass=$((pass+1))
|
||
else
|
||
echo " --- log tail ---"; tail -12 "$log" | sed 's/^/ /'; echo " ---------------"
|
||
fail=$((fail+1))
|
||
fi
|
||
kill "$srv" 2>/dev/null
|
||
rm -f "$log"
|
||
}
|
||
s4_case
|
||
|
||
echo
|
||
echo "protocol-gate: $pass passed, $fail failed"
|
||
[[ $fail -eq 0 ]]
|