W14: pin K19 MCP-harness/runtime primitive parity (test-only)
mcp_tree.ml's parallel primitive table drifted from sx_primitives.ml —
the spec-mandated harness verification path silently produced false
findings ((get {:a 1} :a 99) -> nil vs 1, char-class vs substring split,
etc.). dc7aa709 aligned 8 entries as a stopgap; the real fix (linking
sx_primitives) is hosts-lane.
Add scripts/test-harness-parity.sh: drives mcp_tree.exe sx_eval via raw
JSON-RPC and a fresh sx_server.exe via the epoch protocol, runs the
finding's 12-probe battery through both, fails on any divergence (errors
compared by inner message). 12/12 parity today — the stopgap holds and
can no longer rot silently.
Test-only: no semantics edits, no push.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
107
scripts/test-harness-parity.sh
Executable file
107
scripts/test-harness-parity.sh
Executable file
@@ -0,0 +1,107 @@
|
||||
#!/bin/bash
|
||||
# test-harness-parity.sh — W14 section-C pin for K19 (harness honesty).
|
||||
#
|
||||
# K19 (review, core.md): the MCP tree server (mcp_tree.ml) carries a
|
||||
# PARALLEL primitive table, and it drifted from the real runtime
|
||||
# (sx_primitives.ml) — e.g. (get {:a 1} :a 99) returned nil in the harness
|
||||
# but 1 in production, (split "a--b" "--") was char-class vs substring.
|
||||
# CLAUDE.md mandates harness verification, so drift silently produces
|
||||
# false findings/passes. dc7aa709 aligned 8 entries as a stopgap; the real
|
||||
# fix (mcp_tree links sx_primitives directly) is hosts-lane work.
|
||||
#
|
||||
# This pin runs the finding's exact probe battery through BOTH environments
|
||||
# — mcp_tree.exe sx_eval (JSON-RPC over stdio) and a fresh sx_server.exe
|
||||
# (epoch protocol) — and fails on ANY divergence. Errors are compared by
|
||||
# message, values by serialized form. Both subprocesses are fresh and
|
||||
# timeout-bounded; no shared process is touched.
|
||||
#
|
||||
# Exit: 0 = full parity; 1 = drift (harness lies about the runtime again).
|
||||
set -uo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
MCP=hosts/ocaml/_build/default/bin/mcp_tree.exe
|
||||
SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
|
||||
for bin in "$MCP" "$SERVER"; do
|
||||
if [[ ! -x "$bin" ]]; then
|
||||
echo "SKIP: $bin not built (run sx_build target=ocaml first)" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
python3 - "$MCP" "$SERVER" <<'PYEOF'
|
||||
import json, re, subprocess, sys
|
||||
|
||||
MCP, SERVER = sys.argv[1], sys.argv[2]
|
||||
|
||||
# K19 probe battery — the finding's confirmed drift cases + stopgap entries.
|
||||
PROBES = [
|
||||
'(empty? "")', '(empty? {})',
|
||||
'(get {:a 1} :a 99)', '(get {:a 1} :zz 99)', '(get (list 10 20) 1)',
|
||||
'(split "a--b" "--")', '(split "abc" "")',
|
||||
'(equal? (list 1 2) (list 1 2))',
|
||||
'(contains? {:a 1} :a)', '(keyword-name :kw)',
|
||||
'(char-code "A")', '(parse-number "42")',
|
||||
]
|
||||
|
||||
def norm_error(msg):
|
||||
"""Extract the quoted inner error message so harness/server error
|
||||
envelopes compare equal when the underlying failure is the same."""
|
||||
m = re.search(r'Unhandled exception: \\?"(.*?)\\?"', msg)
|
||||
if m:
|
||||
return "<ERROR> " + m.group(1)
|
||||
return "<ERROR> " + msg.strip()[:80]
|
||||
|
||||
# --- harness side: mcp_tree sx_eval over JSON-RPC ---
|
||||
lines = [
|
||||
json.dumps({"jsonrpc": "2.0", "id": 1, "method": "initialize",
|
||||
"params": {"protocolVersion": "2024-11-05", "capabilities": {},
|
||||
"clientInfo": {"name": "parity", "version": "0"}}}),
|
||||
json.dumps({"jsonrpc": "2.0", "method": "notifications/initialized"}),
|
||||
]
|
||||
for i, p in enumerate(PROBES):
|
||||
lines.append(json.dumps({"jsonrpc": "2.0", "id": 100 + i, "method": "tools/call",
|
||||
"params": {"name": "sx_eval", "arguments": {"expr": p}}}))
|
||||
out = subprocess.run(["timeout", "60", MCP], input="\n".join(lines) + "\n",
|
||||
capture_output=True, text=True).stdout
|
||||
harness = {}
|
||||
for l in out.splitlines():
|
||||
try:
|
||||
j = json.loads(l)
|
||||
except ValueError:
|
||||
continue
|
||||
if isinstance(j.get("id"), int) and j["id"] >= 100:
|
||||
txt = j.get("result", {}).get("content", [{}])[0].get("text", "<none>").strip()
|
||||
if txt.startswith("Error:") or j.get("result", {}).get("isError"):
|
||||
txt = norm_error(txt)
|
||||
harness[j["id"] - 100] = txt
|
||||
|
||||
# --- server side: fresh sx_server over the epoch protocol ---
|
||||
inp = []
|
||||
for i, p in enumerate(PROBES):
|
||||
inp.append(f"(epoch {i + 1})")
|
||||
inp.append(f"(eval {json.dumps(p)})")
|
||||
sout = subprocess.run(["timeout", "60", SERVER], input="\n".join(inp) + "\n",
|
||||
capture_output=True, text=True).stdout
|
||||
server, cur = {}, None
|
||||
for l in sout.splitlines():
|
||||
if l.startswith("(ok-len "):
|
||||
cur = int(l.split()[1]); server[cur - 1] = None
|
||||
elif l.startswith("(error "):
|
||||
idx = int(l.split()[1]); server[idx - 1] = norm_error(l); cur = None
|
||||
elif cur is not None and server.get(cur - 1) is None:
|
||||
server[cur - 1] = l.strip(); cur = None
|
||||
|
||||
fails = 0
|
||||
for i, p in enumerate(PROBES):
|
||||
h = harness.get(i, "<missing>")
|
||||
s = server.get(i, "<missing>")
|
||||
if h == s:
|
||||
print(f"PASS: {p:40s} both -> {h!r}")
|
||||
else:
|
||||
print(f"FAIL: {p:40s} harness={h!r} server={s!r}")
|
||||
fails += 1
|
||||
|
||||
print()
|
||||
print(f"harness-parity: {len(PROBES) - fails} passed, {fails} failed")
|
||||
sys.exit(1 if fails else 0)
|
||||
PYEOF
|
||||
Reference in New Issue
Block a user