Files
rose-ash/tools/mcp_hs_test.py
giles a3abe47286 HS: fix test-456 timeout + add sx_kernel_eval/hs_compile_inspect/hs_parse_inspect tools
- hs-run-filtered.js: add collectionExpressions to _NO_STEP_LIMIT_SUITES (fixes state
  corruption for downstream for-loop tests), add repeat-forever tests to _NO_STEP_LIMIT,
  extend slow deadline for collectionExpressions to 60s
- tests/hs-kernel-eval.js: new standalone Node.js eval script — full WASM kernel +
  mock DOM, accepts HS_EVAL_EXPR/MODE/SETUP/FILES env vars, supports eval/compile/parse modes
- tools/mcp_hs_test.py: add sx_kernel_eval, hs_compile_inspect, hs_parse_inspect tools
- hosts/ocaml/bin/mcp_tree.ml: add host_stubs param to sx_harness_eval (OCaml build pending)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-02 17:45:12 +00:00

497 lines
18 KiB
Python
Executable File

#!/usr/bin/env python3
"""MCP server for hyperscript conformance test runs.
Wraps `node tests/hs-run-filtered.js` so the agent can run the suite (full
or filtered), regenerate the SX test file, and kill stale background runs
without going through Bash for each step.
Stdio JSON-RPC transport, same protocol as tools/mcp_services.py.
"""
import json
import os
import re
import signal
import subprocess
import sys
import time
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
RUNNER_PATH = os.path.join(PROJECT_DIR, "tests/hs-run-filtered.js")
GEN_PATH = os.path.join(PROJECT_DIR, "tests/playwright/generate-sx-tests.py")
EVAL_PATH = os.path.join(PROJECT_DIR, "tests/hs-kernel-eval.js")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def text_result(s):
return {"content": [{"type": "text", "text": s}]}
def error_result(s):
return {"content": [{"type": "text", "text": s}], "isError": True}
def find_runner_pids():
"""Return PIDs of any running `node tests/hs-run-filtered.js` processes."""
try:
out = subprocess.run(
["pgrep", "-f", "node tests/hs-run-filtered.js"],
capture_output=True, text=True, timeout=5,
).stdout.strip()
except Exception:
return []
return [int(p) for p in out.split() if p.isdigit()]
# ---------------------------------------------------------------------------
# Tool: hs_test_run
# ---------------------------------------------------------------------------
def hs_test_run(args):
"""Run the HS test suite (optionally filtered) with a wall-clock timeout.
Args:
suite: HS_SUITE filter (e.g. "hs-upstream-put"). Optional.
start: HS_START — first test index. Optional.
end: HS_END — exclusive end test index. Optional.
step_limit: HS_STEP_LIMIT (default 200000).
timeout_secs: wall-clock cap (default 300, max 1800).
summary_only: if true (default), strip per-test output and keep
only Results / By category / Failure types / All failures.
verbose: if true, set HS_VERBOSE=1.
"""
if not os.path.isfile(RUNNER_PATH):
return error_result(f"Runner not found at {RUNNER_PATH}")
env = os.environ.copy()
if args.get("suite"):
env["HS_SUITE"] = str(args["suite"])
if args.get("start") is not None:
env["HS_START"] = str(int(args["start"]))
if args.get("end") is not None:
env["HS_END"] = str(int(args["end"]))
if args.get("step_limit") is not None:
env["HS_STEP_LIMIT"] = str(int(args["step_limit"]))
if args.get("verbose"):
env["HS_VERBOSE"] = "1"
timeout = max(10, min(int(args.get("timeout_secs", 300)), 1800))
summary_only = args.get("summary_only", True)
t0 = time.time()
proc = subprocess.Popen(
["node", RUNNER_PATH],
cwd=PROJECT_DIR,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
# Detach into its own process group so we can kill the whole tree.
preexec_fn=os.setsid,
)
timed_out = False
try:
stdout, _ = proc.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
timed_out = True
# Kill the whole process group (SIGTERM, then SIGKILL after 2s).
try:
os.killpg(proc.pid, signal.SIGTERM)
time.sleep(2)
if proc.poll() is None:
os.killpg(proc.pid, signal.SIGKILL)
except ProcessLookupError:
pass
try:
stdout, _ = proc.communicate(timeout=5)
except subprocess.TimeoutExpired:
stdout = ""
elapsed = time.time() - t0
# Find the last "Tn " (verbose progress marker) or last failure line so
# we can tell the agent where things were when a timeout fired.
last_progress = ""
if timed_out:
for line in (stdout or "").splitlines()[::-1]:
if line.startswith(" SLOW:") or line.startswith(" TIMEOUT:") or "/1496 " in line:
last_progress = line.strip()
break
if summary_only:
# Keep only the post-summary sections.
lines = (stdout or "").splitlines()
kept = []
in_summary = False
for line in lines:
if line.startswith("Results:"):
in_summary = True
if in_summary:
kept.append(line)
elif line.startswith(" TIMEOUT:") or line.startswith(" SLOW:"):
# Useful for debugging hangs even when summary-only.
kept.append(line)
stdout = "\n".join(kept) or stdout
header = [f"## hs_test_run ({elapsed:.1f}s)"]
if timed_out:
header.append(f"⚠️ TIMED OUT after {timeout}s")
if last_progress:
header.append(f"Last activity: {last_progress}")
if env.get("HS_SUITE"):
header.append(f"suite: {env['HS_SUITE']}")
if env.get("HS_START") or env.get("HS_END"):
header.append(f"range: {env.get('HS_START', '0')}-{env.get('HS_END', 'end')}")
header.append(f"step_limit: {env.get('HS_STEP_LIMIT', '200000')}")
return text_result("\n".join(header) + "\n\n" + (stdout or "(no output)"))
# ---------------------------------------------------------------------------
# Tool: hs_test_kill
# ---------------------------------------------------------------------------
def hs_test_kill(args):
"""Kill any running `node tests/hs-run-filtered.js` processes.
Args:
signal_name: "TERM" (default) or "KILL".
"""
sig = signal.SIGTERM if args.get("signal_name", "TERM") == "TERM" else signal.SIGKILL
pids = find_runner_pids()
if not pids:
return text_result("No runner processes found.")
killed, failed = [], []
for p in pids:
try:
os.kill(p, sig)
killed.append(p)
except ProcessLookupError:
pass
except Exception as e:
failed.append((p, str(e)))
msg = f"Sent SIG{args.get('signal_name', 'TERM')} to {len(killed)} runner(s): {killed}"
if failed:
msg += f"\nFailed: {failed}"
return text_result(msg)
# ---------------------------------------------------------------------------
# Tool: hs_test_regen
# ---------------------------------------------------------------------------
def hs_test_regen(args):
"""Regenerate spec/tests/test-hyperscript-behavioral.sx from the upstream JSON."""
if not os.path.isfile(GEN_PATH):
return error_result(f"Generator not found at {GEN_PATH}")
try:
r = subprocess.run(
["python3", GEN_PATH],
cwd=PROJECT_DIR, capture_output=True, text=True, timeout=120,
)
except subprocess.TimeoutExpired:
return error_result("Generator timed out (>120s)")
out = (r.stdout or "") + (r.stderr or "")
# Trim — generator prints a long category list; keep just the tail.
lines = out.splitlines()
if len(lines) > 25:
out = "...\n" + "\n".join(lines[-25:])
status = "ok" if r.returncode == 0 else f"exit {r.returncode}"
return text_result(f"## hs_test_regen ({status})\n\n{out}")
# ---------------------------------------------------------------------------
# Tool: hs_test_status
# ---------------------------------------------------------------------------
def hs_test_status(args):
"""Show whether any runner is in flight, plus runner/generator paths."""
pids = find_runner_pids()
info = [f"runner: {RUNNER_PATH}", f"generator: {GEN_PATH}"]
if pids:
info.append(f"running pids: {pids}")
else:
info.append("running pids: (none)")
return text_result("\n".join(info))
# ---------------------------------------------------------------------------
# Shared helper: run hs-kernel-eval.js
# ---------------------------------------------------------------------------
def _kernel_eval(mode, expr, setup=None, files=None, timeout_secs=60):
"""Run hs-kernel-eval.js and return a text_result."""
if not os.path.isfile(EVAL_PATH):
return error_result(f"Eval script not found at {EVAL_PATH}")
env = os.environ.copy()
env["HS_EVAL_MODE"] = mode
env["HS_EVAL_EXPR"] = expr
env["HS_EVAL_TIMEOUT_MS"] = str(max(5000, int(timeout_secs) * 1000))
if setup:
env["HS_EVAL_SETUP"] = setup
if files:
env["HS_EVAL_FILES"] = ",".join(files)
timeout = max(10, min(int(timeout_secs), 300))
try:
r = subprocess.run(
["node", EVAL_PATH],
cwd=PROJECT_DIR, env=env,
capture_output=True, text=True, timeout=timeout,
)
except subprocess.TimeoutExpired:
return error_result(f"Kernel eval timed out after {timeout}s")
stderr = (r.stderr or "").strip()
stdout = (r.stdout or "").strip()
# Parse JSON result from stdout
try:
import json
data = json.loads(stdout)
if data.get("ok"):
result = data.get("result", "nil")
# Unescape JSON-stringified result
try:
result = json.loads(result)
except Exception:
pass
out = f"Result: {result}"
else:
out = f"Error: {data.get('error', 'unknown error')}"
except Exception:
out = stdout or "(no output)"
if stderr:
# Filter noisy load-progress lines, keep errors
err_lines = [l for l in stderr.splitlines()
if not l.startswith("Loading") and not l.startswith("Modules") and "ms" not in l]
if err_lines:
out += "\n\nstderr:\n" + "\n".join(err_lines)
return text_result(out)
# ---------------------------------------------------------------------------
# Tool: sx_kernel_eval
# ---------------------------------------------------------------------------
def sx_kernel_eval(args):
"""Evaluate a SX expression in the full WASM kernel with HS modules loaded.
The kernel includes mock DOM, so HS runtime functions (hs-repeat-forever,
hs-compile, dom-dispatch, etc.) are available. Use this when sx_harness_eval
fails due to missing host primitives (host-new, host-get, etc.).
Args:
expr: SX expression to evaluate (required).
setup: SX setup expression run before main eval (optional).
files: List of .sx files to load before eval (optional).
timeout_secs: Wall-clock cap in seconds (default 60, max 300).
"""
expr = args.get("expr", "").strip()
if not expr:
return error_result("'expr' is required")
return _kernel_eval(
mode="eval",
expr=expr,
setup=args.get("setup"),
files=args.get("files"),
timeout_secs=int(args.get("timeout_secs", 60)),
)
# ---------------------------------------------------------------------------
# Tool: hs_compile_inspect
# ---------------------------------------------------------------------------
def hs_compile_inspect(args):
"""Compile an HS source string and return the generated SX AST.
Runs hs-compile on the source and returns its string representation.
Useful for debugging what AST the HS compiler produces for a given snippet.
Args:
hs_source: HS source code to compile (required).
timeout_secs: Wall-clock cap in seconds (default 30).
"""
src = args.get("hs_source", "").strip()
if not src:
return error_result("'hs_source' is required")
return _kernel_eval(
mode="compile",
expr=src,
timeout_secs=int(args.get("timeout_secs", 30)),
)
# ---------------------------------------------------------------------------
# Tool: hs_parse_inspect
# ---------------------------------------------------------------------------
def hs_parse_inspect(args):
"""Parse an HS source string and return the raw parser AST (before compilation).
Runs hs-parse on the source and returns its string representation.
Useful for debugging tokenizer/parser output before the compiler sees it.
Args:
hs_source: HS source code to parse (required).
timeout_secs: Wall-clock cap in seconds (default 30).
"""
src = args.get("hs_source", "").strip()
if not src:
return error_result("'hs_source' is required")
return _kernel_eval(
mode="parse",
expr=src,
timeout_secs=int(args.get("timeout_secs", 30)),
)
# ---------------------------------------------------------------------------
# JSON-RPC dispatch
# ---------------------------------------------------------------------------
def tool(name, description, properties, required):
return {
"name": name,
"description": description,
"inputSchema": {"type": "object", "properties": properties, "required": required},
}
TOOLS = [
tool(
"hs_test_run",
"Run the hyperscript conformance suite (node tests/hs-run-filtered.js) with a "
"wall-clock timeout. Optionally filter by suite or test index range. Always "
"kills the child process group on timeout.",
{
"suite": {"type": "string", "description": "HS_SUITE filter (e.g. 'hs-upstream-put')"},
"start": {"type": "integer", "description": "First test index (HS_START)"},
"end": {"type": "integer", "description": "Exclusive end test index (HS_END)"},
"step_limit": {"type": "integer", "description": "HS_STEP_LIMIT (default 200000)"},
"timeout_secs": {"type": "integer", "description": "Wall-clock cap (default 300, max 1800)"},
"summary_only": {"type": "boolean", "description": "Trim per-test output (default true)"},
"verbose": {"type": "boolean", "description": "Set HS_VERBOSE=1"},
},
[],
),
tool(
"hs_test_kill",
"Kill any background hs-run-filtered.js processes.",
{"signal_name": {"type": "string", "description": "TERM (default) or KILL"}},
[],
),
tool(
"hs_test_regen",
"Regenerate spec/tests/test-hyperscript-behavioral.sx from the upstream JSON.",
{},
[],
),
tool(
"hs_test_status",
"Show whether any runner is currently in flight.",
{},
[],
),
tool(
"sx_kernel_eval",
"Evaluate a SX expression in the full WASM kernel with HS modules and mock DOM loaded. "
"Use when sx_harness_eval fails due to missing host primitives (host-new, host-get, etc.). "
"Has access to hs-compile, hs-parse, hs-repeat-forever, dom-dispatch, etc.",
{
"expr": {"type": "string", "description": "SX expression to evaluate"},
"setup": {"type": "string", "description": "SX setup expression run before eval (optional)"},
"files": {"type": "array", "items": {"type": "string"},
"description": "Extra .sx files to load before eval (optional)"},
"timeout_secs": {"type": "integer", "description": "Wall-clock cap in seconds (default 60, max 300)"},
},
["expr"],
),
tool(
"hs_compile_inspect",
"Compile an HS source snippet and return the generated SX AST string. "
"Runs hs-compile and returns (str result). Use to debug what AST the compiler produces.",
{
"hs_source": {"type": "string", "description": "HS source code to compile"},
"timeout_secs": {"type": "integer", "description": "Wall-clock cap in seconds (default 30)"},
},
["hs_source"],
),
tool(
"hs_parse_inspect",
"Parse an HS source snippet and return the raw parser AST (before compilation). "
"Runs hs-parse and returns (str result). Use to debug tokenizer/parser output.",
{
"hs_source": {"type": "string", "description": "HS source code to parse"},
"timeout_secs": {"type": "integer", "description": "Wall-clock cap in seconds (default 30)"},
},
["hs_source"],
),
]
def handle_tool(name, args):
match name:
case "hs_test_run":
return hs_test_run(args)
case "hs_test_kill":
return hs_test_kill(args)
case "hs_test_regen":
return hs_test_regen(args)
case "hs_test_status":
return hs_test_status(args)
case "sx_kernel_eval":
return sx_kernel_eval(args)
case "hs_compile_inspect":
return hs_compile_inspect(args)
case "hs_parse_inspect":
return hs_parse_inspect(args)
case _:
return error_result(f"Unknown tool: {name}")
def dispatch(method, params):
match method:
case "initialize":
return {
"protocolVersion": "2024-11-05",
"capabilities": {"tools": {}},
"serverInfo": {"name": "hs-test", "version": "0.1.0"},
}
case "notifications/initialized":
return None
case "tools/list":
return {"tools": TOOLS}
case "tools/call":
name = params["name"]
args = params.get("arguments", {}) or {}
try:
return handle_tool(name, args)
except Exception as e:
return error_result(f"Error: {e}")
case _:
return None
def main():
for line in sys.stdin:
line = line.strip()
if not line:
continue
try:
msg = json.loads(line)
except json.JSONDecodeError:
continue
method = msg.get("method", "")
params = msg.get("params", {})
msg_id = msg.get("id")
result = dispatch(method, params)
if msg_id is not None and result is not None:
print(json.dumps({"jsonrpc": "2.0", "id": msg_id, "result": result}), flush=True)
if __name__ == "__main__":
main()