HS: fix test-456 timeout + add sx_kernel_eval/hs_compile_inspect/hs_parse_inspect tools

- hs-run-filtered.js: add collectionExpressions to _NO_STEP_LIMIT_SUITES (fixes state
  corruption for downstream for-loop tests), add repeat-forever tests to _NO_STEP_LIMIT,
  extend slow deadline for collectionExpressions to 60s
- tests/hs-kernel-eval.js: new standalone Node.js eval script — full WASM kernel +
  mock DOM, accepts HS_EVAL_EXPR/MODE/SETUP/FILES env vars, supports eval/compile/parse modes
- tools/mcp_hs_test.py: add sx_kernel_eval, hs_compile_inspect, hs_parse_inspect tools
- hosts/ocaml/bin/mcp_tree.ml: add host_stubs param to sx_harness_eval (OCaml build pending)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-02 17:45:12 +00:00
parent d25a97d464
commit a3abe47286
4 changed files with 466 additions and 2 deletions

View File

@@ -18,7 +18,8 @@ import time
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
RUNNER_PATH = os.path.join(PROJECT_DIR, "tests/hs-run-filtered.js")
GEN_PATH = os.path.join(PROJECT_DIR, "tests/playwright/generate-sx-tests.py")
GEN_PATH = os.path.join(PROJECT_DIR, "tests/playwright/generate-sx-tests.py")
EVAL_PATH = os.path.join(PROJECT_DIR, "tests/hs-kernel-eval.js")
# ---------------------------------------------------------------------------
@@ -218,6 +219,135 @@ def hs_test_status(args):
return text_result("\n".join(info))
# ---------------------------------------------------------------------------
# Shared helper: run hs-kernel-eval.js
# ---------------------------------------------------------------------------
def _kernel_eval(mode, expr, setup=None, files=None, timeout_secs=60):
"""Run hs-kernel-eval.js and return a text_result."""
if not os.path.isfile(EVAL_PATH):
return error_result(f"Eval script not found at {EVAL_PATH}")
env = os.environ.copy()
env["HS_EVAL_MODE"] = mode
env["HS_EVAL_EXPR"] = expr
env["HS_EVAL_TIMEOUT_MS"] = str(max(5000, int(timeout_secs) * 1000))
if setup:
env["HS_EVAL_SETUP"] = setup
if files:
env["HS_EVAL_FILES"] = ",".join(files)
timeout = max(10, min(int(timeout_secs), 300))
try:
r = subprocess.run(
["node", EVAL_PATH],
cwd=PROJECT_DIR, env=env,
capture_output=True, text=True, timeout=timeout,
)
except subprocess.TimeoutExpired:
return error_result(f"Kernel eval timed out after {timeout}s")
stderr = (r.stderr or "").strip()
stdout = (r.stdout or "").strip()
# Parse JSON result from stdout
try:
import json
data = json.loads(stdout)
if data.get("ok"):
result = data.get("result", "nil")
# Unescape JSON-stringified result
try:
result = json.loads(result)
except Exception:
pass
out = f"Result: {result}"
else:
out = f"Error: {data.get('error', 'unknown error')}"
except Exception:
out = stdout or "(no output)"
if stderr:
# Filter noisy load-progress lines, keep errors
err_lines = [l for l in stderr.splitlines()
if not l.startswith("Loading") and not l.startswith("Modules") and "ms" not in l]
if err_lines:
out += "\n\nstderr:\n" + "\n".join(err_lines)
return text_result(out)
# ---------------------------------------------------------------------------
# Tool: sx_kernel_eval
# ---------------------------------------------------------------------------
def sx_kernel_eval(args):
"""Evaluate a SX expression in the full WASM kernel with HS modules loaded.
The kernel includes mock DOM, so HS runtime functions (hs-repeat-forever,
hs-compile, dom-dispatch, etc.) are available. Use this when sx_harness_eval
fails due to missing host primitives (host-new, host-get, etc.).
Args:
expr: SX expression to evaluate (required).
setup: SX setup expression run before main eval (optional).
files: List of .sx files to load before eval (optional).
timeout_secs: Wall-clock cap in seconds (default 60, max 300).
"""
expr = args.get("expr", "").strip()
if not expr:
return error_result("'expr' is required")
return _kernel_eval(
mode="eval",
expr=expr,
setup=args.get("setup"),
files=args.get("files"),
timeout_secs=int(args.get("timeout_secs", 60)),
)
# ---------------------------------------------------------------------------
# Tool: hs_compile_inspect
# ---------------------------------------------------------------------------
def hs_compile_inspect(args):
"""Compile an HS source string and return the generated SX AST.
Runs hs-compile on the source and returns its string representation.
Useful for debugging what AST the HS compiler produces for a given snippet.
Args:
hs_source: HS source code to compile (required).
timeout_secs: Wall-clock cap in seconds (default 30).
"""
src = args.get("hs_source", "").strip()
if not src:
return error_result("'hs_source' is required")
return _kernel_eval(
mode="compile",
expr=src,
timeout_secs=int(args.get("timeout_secs", 30)),
)
# ---------------------------------------------------------------------------
# Tool: hs_parse_inspect
# ---------------------------------------------------------------------------
def hs_parse_inspect(args):
"""Parse an HS source string and return the raw parser AST (before compilation).
Runs hs-parse on the source and returns its string representation.
Useful for debugging tokenizer/parser output before the compiler sees it.
Args:
hs_source: HS source code to parse (required).
timeout_secs: Wall-clock cap in seconds (default 30).
"""
src = args.get("hs_source", "").strip()
if not src:
return error_result("'hs_source' is required")
return _kernel_eval(
mode="parse",
expr=src,
timeout_secs=int(args.get("timeout_secs", 30)),
)
# ---------------------------------------------------------------------------
# JSON-RPC dispatch
# ---------------------------------------------------------------------------
@@ -265,6 +395,40 @@ TOOLS = [
{},
[],
),
tool(
"sx_kernel_eval",
"Evaluate a SX expression in the full WASM kernel with HS modules and mock DOM loaded. "
"Use when sx_harness_eval fails due to missing host primitives (host-new, host-get, etc.). "
"Has access to hs-compile, hs-parse, hs-repeat-forever, dom-dispatch, etc.",
{
"expr": {"type": "string", "description": "SX expression to evaluate"},
"setup": {"type": "string", "description": "SX setup expression run before eval (optional)"},
"files": {"type": "array", "items": {"type": "string"},
"description": "Extra .sx files to load before eval (optional)"},
"timeout_secs": {"type": "integer", "description": "Wall-clock cap in seconds (default 60, max 300)"},
},
["expr"],
),
tool(
"hs_compile_inspect",
"Compile an HS source snippet and return the generated SX AST string. "
"Runs hs-compile and returns (str result). Use to debug what AST the compiler produces.",
{
"hs_source": {"type": "string", "description": "HS source code to compile"},
"timeout_secs": {"type": "integer", "description": "Wall-clock cap in seconds (default 30)"},
},
["hs_source"],
),
tool(
"hs_parse_inspect",
"Parse an HS source snippet and return the raw parser AST (before compilation). "
"Runs hs-parse and returns (str result). Use to debug tokenizer/parser output.",
{
"hs_source": {"type": "string", "description": "HS source code to parse"},
"timeout_secs": {"type": "integer", "description": "Wall-clock cap in seconds (default 30)"},
},
["hs_source"],
),
]
@@ -278,6 +442,12 @@ def handle_tool(name, args):
return hs_test_regen(args)
case "hs_test_status":
return hs_test_status(args)
case "sx_kernel_eval":
return sx_kernel_eval(args)
case "hs_compile_inspect":
return hs_compile_inspect(args)
case "hs_parse_inspect":
return hs_parse_inspect(args)
case _:
return error_result(f"Unknown tool: {name}")