diff --git a/plans/agent-briefings/sx-gate-loop.md b/plans/agent-briefings/sx-gate-loop.md index 5598673c..a48620d3 100644 --- a/plans/agent-briefings/sx-gate-loop.md +++ b/plans/agent-briefings/sx-gate-loop.md @@ -91,7 +91,9 @@ Pin each confirmed-and-fixed finding with a minimal repro. Add suites to sx-build-all.sh left as maintainer call (gate definition D3) ### E. Epoch-loop protocol fuzz + skip-list -- [ ] C3/C4/C5/C6/C7 — epoch protocol fuzz suite +- [x] C3/C4/C5/C6/C7 — protocol-quirk ledger (pins current behavior, + bidirectional) + seeded 60-line fuzz-liveness property in + `scripts/test-protocol-gate.sh` (11/11) - [ ] F10 — hs-upstream skip-list so browser-only FAILs mean something - [ ] C9 — empty suite label @@ -100,6 +102,20 @@ Pin each confirmed-and-fixed finding with a minimal repro. Add suites to ## Progress log (newest first) +- 2026-07-04 — **C3–C7 protocol fuzz suite (item E.1)**. All five findings + are still OPEN server-side (sx_server.ml fixes are host-runtime work), + so the suite pins CURRENT behavior as a bidirectional ledger — verified + each live first: C3 stray io-response → extra Unknown-command reply + (dead 13-vs-14-char guard); C4 malformed (epoch) → error reply + stale + epoch tag (envelope changed since the finding: dc7aa709's guard now + answers rather than ignores); C5 decreasing epoch accepted; C6 two + commands one line → one error, neither runs; C7 vm-trace sans compiler → + opaque "Not callable: nil". Plus a real fuzz property: 60 + deterministically-seeded hostile lines (unbalanced parens, control chars, + unicode, 2KB lines, stray io-responses, epoch mutations) then a + well-formed command — server must still answer and exit cleanly. + protocol-gate now 11/11. When a server fix lands, the matching ledger + pin fails loudly → update to assert the corrected behavior. Test-only. - 2026-07-04 — **F2 WASM corpus runner (section D COMPLETE)**. The review's headline conformance gap: no runner ever fed spec/tests through the SHIPPED browser artifact (F-1/F-3 divergences existed undetected). Built diff --git a/scripts/test-protocol-gate.sh b/scripts/test-protocol-gate.sh index 798fcab8..12eb8cbf 100755 --- a/scripts/test-protocol-gate.sh +++ b/scripts/test-protocol-gate.sh @@ -84,6 +84,94 @@ else fail=$((fail+1)) fi +# --------------------------------------------------------------------------- +# C3–C7 protocol-quirk LEDGER (hosts.md, all OPEN server-side). These pin +# CURRENT behavior, verified live 2026-07-04 — they are documentation, not +# endorsement. When a server fix lands and a pin fails, update the ledger +# to assert the corrected behavior (bidirectional, like test-env-parity.sh). +# --------------------------------------------------------------------------- + +# ledger_case NAME INPUT GREP_MUST GREP_MUST2 +ledger_case() { + local name="$1" input="$2" must="$3" must2="${4:-}" + local out + out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1) + local ok=1 + grep -q -- "$must" <<<"$out" || { echo "FAIL: $name — expected: $must"; ok=0; } + if [[ -n "$must2" ]]; then + grep -q -- "$must2" <<<"$out" || { echo "FAIL: $name — expected: $must2"; ok=0; } + fi + if grep -q 'Fatal error' <<<"$out"; then + echo "FAIL: $name — process died"; ok=0 + fi + if [[ $ok -eq 1 ]]; then echo "PASS: $name"; pass=$((pass+1)); + else echo " --- output ---"; sed 's/^/ /' <<<"$out"; fail=$((fail+1)); fi +} + +# C3: stray (io-response ...) is answered as Unknown command (dead guard) — +# an EXTRA response the client didn't ask for; process keeps serving. +ledger_case "C3 ledger: stray io-response gets an extra error reply" \ + '(epoch 1)\n(io-response 1 42)\n(eval "5")\n' \ + 'Unknown command: (io-response 1 42)' '^5$' + +# C4: malformed (epoch) doesn't update the epoch — next reply tagged with +# the OLD epoch (0 here), i.e. stale from the client's viewpoint. +ledger_case "C4 ledger: malformed epoch marker leaves epoch stale" \ + '(epoch)\n(eval "2")\n' \ + '(ok-len 0 1)' '^2$' + +# C5: no monotonic-epoch enforcement — a decreasing epoch is accepted. +ledger_case "C5 ledger: decreasing epoch accepted silently" \ + '(epoch 9)\n(epoch 3)\n(eval "42")\n' \ + '(ok-len 3 2)' '^42$' + +# C6: two commands on one line -> one error, NEITHER executed. +ledger_case "C6 ledger: two commands on one line both dropped" \ + '(epoch 1)\n(eval "1") (eval "2")\n(eval "3")\n' \ + 'Expected single command, got 2' '^3$' + +# C7: vm-trace without the compiler loaded errors opaquely. +ledger_case "C7 ledger: vm-trace sans compiler is opaque Not-callable-nil" \ + '(epoch 1)\n(vm-trace "(+ 1 2)")\n' \ + 'Not callable: nil' + +# --------------------------------------------------------------------------- +# Fuzz-liveness property: after 60 deterministic hostile lines (unbalanced +# parens, control chars, unicode, long lines, stray io-responses, epoch +# mutations), the server must still answer a well-formed command and exit +# cleanly. Seeded PRNG — reproducible corpus. +# --------------------------------------------------------------------------- +fuzz=$(python3 - <<'PY' +import random +r = random.Random(1404) +lines = [] +frag = ['(', ')', '((', '))', '(eval', '(epoch', 'io-response', '"', '\\', + 'café', '\x01', '\x1b[2J', ':kw', '{', '}', '(+ 1', 'nil)', '#|', '|#'] +for i in range(60): + kind = r.randrange(5) + if kind == 0: + lines.append(''.join(r.choice(frag) for _ in range(r.randrange(1, 8)))) + elif kind == 1: + lines.append('(epoch ' + r.choice(['', 'foo', '-1', '999999999999999999999', ')']) + ')') + elif kind == 2: + lines.append('(io-response %d %s' % (r.randrange(99), r.choice([')', '', '42']) )) + elif kind == 3: + lines.append('x' * r.randrange(200, 2000)) + else: + lines.append('(eval "' + r.choice(['(+ 1', '(list', '\\\\', '((((']) + '")') +print('\n'.join(lines)) +PY +) +out=$(printf '%s\n(epoch 777)\n(eval "\\"alive\\"")\n' "$fuzz" | timeout 90 "$SERVER" 2>&1) +rc=$? +if grep -q '^"alive"$' <<<"$out" && ! grep -q 'Fatal error' <<<"$out" && [[ $rc -eq 0 ]]; then + echo "PASS: fuzz-liveness — server survives 60 hostile lines and still answers" + pass=$((pass+1)) +else + echo "FAIL: fuzz-liveness (rc=$rc)"; tail -6 <<<"$out" | sed 's/^/ /' + fail=$((fail+1)) +fi + # --------------------------------------------------------------------------- # S4 (review, hosts.md): soft error pages must NOT be stored in the HTTP # response cache. Pre-fix, a routing-failure page was cached as HTTP 200 and