#!/bin/bash # test-harness-parity.sh — W14 section-C pin for K19 (harness honesty). # # K19 (review, core.md): the MCP tree server (mcp_tree.ml) carries a # PARALLEL primitive table, and it drifted from the real runtime # (sx_primitives.ml) — e.g. (get {:a 1} :a 99) returned nil in the harness # but 1 in production, (split "a--b" "--") was char-class vs substring. # CLAUDE.md mandates harness verification, so drift silently produces # false findings/passes. dc7aa709 aligned 8 entries as a stopgap; the real # fix (mcp_tree links sx_primitives directly) is hosts-lane work. # # This pin runs the finding's exact probe battery through BOTH environments # — mcp_tree.exe sx_eval (JSON-RPC over stdio) and a fresh sx_server.exe # (epoch protocol) — and fails on ANY divergence. Errors are compared by # message, values by serialized form. Both subprocesses are fresh and # timeout-bounded; no shared process is touched. # # Exit: 0 = full parity; 1 = drift (harness lies about the runtime again). set -uo pipefail cd "$(dirname "$0")/.." MCP=hosts/ocaml/_build/default/bin/mcp_tree.exe SERVER=hosts/ocaml/_build/default/bin/sx_server.exe for bin in "$MCP" "$SERVER"; do if [[ ! -x "$bin" ]]; then echo "SKIP: $bin not built (run sx_build target=ocaml first)" >&2 exit 2 fi done python3 - "$MCP" "$SERVER" <<'PYEOF' import json, re, subprocess, sys MCP, SERVER = sys.argv[1], sys.argv[2] # K19 probe battery — the finding's confirmed drift cases + stopgap entries. PROBES = [ '(empty? "")', '(empty? {})', '(get {:a 1} :a 99)', '(get {:a 1} :zz 99)', '(get (list 10 20) 1)', '(split "a--b" "--")', '(split "abc" "")', '(equal? (list 1 2) (list 1 2))', '(contains? {:a 1} :a)', '(keyword-name :kw)', '(char-code "A")', '(parse-number "42")', ] def norm_error(msg): """Extract the quoted inner error message so harness/server error envelopes compare equal when the underlying failure is the same.""" m = re.search(r'Unhandled exception: \\?"(.*?)\\?"', msg) if m: return " " + m.group(1) return " " + msg.strip()[:80] # --- harness side: mcp_tree sx_eval over JSON-RPC --- lines = [ json.dumps({"jsonrpc": "2.0", "id": 1, "method": "initialize", "params": {"protocolVersion": "2024-11-05", "capabilities": {}, "clientInfo": {"name": "parity", "version": "0"}}}), json.dumps({"jsonrpc": "2.0", "method": "notifications/initialized"}), ] for i, p in enumerate(PROBES): lines.append(json.dumps({"jsonrpc": "2.0", "id": 100 + i, "method": "tools/call", "params": {"name": "sx_eval", "arguments": {"expr": p}}})) out = subprocess.run(["timeout", "60", MCP], input="\n".join(lines) + "\n", capture_output=True, text=True).stdout harness = {} for l in out.splitlines(): try: j = json.loads(l) except ValueError: continue if isinstance(j.get("id"), int) and j["id"] >= 100: txt = j.get("result", {}).get("content", [{}])[0].get("text", "").strip() if txt.startswith("Error:") or j.get("result", {}).get("isError"): txt = norm_error(txt) harness[j["id"] - 100] = txt # --- server side: fresh sx_server over the epoch protocol --- inp = [] for i, p in enumerate(PROBES): inp.append(f"(epoch {i + 1})") inp.append(f"(eval {json.dumps(p)})") sout = subprocess.run(["timeout", "60", SERVER], input="\n".join(inp) + "\n", capture_output=True, text=True).stdout server, cur = {}, None for l in sout.splitlines(): if l.startswith("(ok-len "): cur = int(l.split()[1]); server[cur - 1] = None elif l.startswith("(error "): idx = int(l.split()[1]); server[idx - 1] = norm_error(l); cur = None elif cur is not None and server.get(cur - 1) is None: server[cur - 1] = l.strip(); cur = None fails = 0 for i, p in enumerate(PROBES): h = harness.get(i, "") s = server.get(i, "") if h == s: print(f"PASS: {p:40s} both -> {h!r}") else: print(f"FAIL: {p:40s} harness={h!r} server={s!r}") fails += 1 print() print(f"harness-parity: {len(PROBES) - fails} passed, {fails} failed") sys.exit(1 if fails else 0) PYEOF