lua: conformance.sh + Python runner (writes scoreboard.{json,md})

2026-04-24 17:37:09 +00:00
parent d925be4768
commit 3ab8474e78
3 changed files with 363 additions and 1 deletions
--- a/lib/lua/conformance.py
+++ b/lib/lua/conformance.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python3
+"""lua-conformance — run the PUC-Rio Lua 5.1 test suite against Lua-on-SX.
+
+Walks lib/lua/lua-tests/*.lua, evaluates each via `lua-eval-ast` on a
+long-lived sx_server.exe subprocess, classifies pass/fail/timeout per file,
+and writes lib/lua/scoreboard.{json,md}.
+
+Modelled on lib/js/test262-runner.py but much simpler: each Lua test file is
+its own unit (they're self-contained assertion scripts; they pass if they
+complete without raising). No harness stub, no frontmatter, no worker pool.
+
+Usage:
+    python3 lib/lua/conformance.py
+    python3 lib/lua/conformance.py --filter locals
+    python3 lib/lua/conformance.py --per-test-timeout 3 -v
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import select
+import subprocess
+import sys
+import time
+from collections import Counter
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[2]
+SX_SERVER_PRIMARY = REPO / "hosts" / "ocaml" / "_build" / "default" / "bin" / "sx_server.exe"
+SX_SERVER_FALLBACK = Path("/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe")
+TESTS_DIR = REPO / "lib" / "lua" / "lua-tests"
+
+DEFAULT_TIMEOUT = 8.0
+
+# Files that require facilities we don't (and won't soon) support.
+# Still classified as skip rather than fail so the scoreboard stays honest.
+HARDCODED_SKIP = {
+    "all.lua":        "driver uses dofile to chain other tests",
+    "api.lua":        "requires testC (C debug library)",
+    "checktable.lua": "internal debug helpers",
+    "code.lua":       "bytecode inspection via debug library",
+    "db.lua":         "debug library",
+    "files.lua":      "io library",
+    "gc.lua":         "collectgarbage / finalisers",
+    "main.lua":       "standalone interpreter driver",
+}
+
+RX_OK_INLINE = re.compile(r"^\(ok (\d+) (.*)\)\s*$")
+RX_OK_LEN = re.compile(r"^\(ok-len (\d+) \d+\)\s*$")
+RX_ERR = re.compile(r"^\(error (\d+) (.*)\)\s*$")
+
+
+def pick_sx_server() -> Path:
+    if SX_SERVER_PRIMARY.exists():
+        return SX_SERVER_PRIMARY
+    return SX_SERVER_FALLBACK
+
+
+def sx_escape_nested(s: str) -> str:
+    """Two-level escape: (eval "(lua-eval-ast \"<src>\")").
+
+    Outer literal is consumed by `eval` then the inner literal by `lua-eval-ast`.
+    """
+    inner = (
+        s.replace("\\", "\\\\")
+        .replace('"', '\\"')
+        .replace("\n", "\\n")
+        .replace("\r", "\\r")
+        .replace("\t", "\\t")
+    )
+    return inner.replace("\\", "\\\\").replace('"', '\\"')
+
+
+def classify_error(msg: str) -> str:
+    m = msg.lower()
+    sym = re.search(r"undefined symbol:\s*\\?\"?([^\"\s)]+)", msg, re.I)
+    if sym:
+        return f"undefined symbol: {sym.group(1).strip(chr(34))}"
+    if "undefined symbol" in m:
+        return "undefined symbol"
+    if "lua: arith" in m:
+        return "arith type error"
+    if "lua-transpile" in m:
+        return "transpile: unsupported node"
+    if "lua-parse" in m:
+        return "parse error"
+    if "lua-tokenize" in m:
+        return "tokenize error"
+    if "unknown node" in m:
+        return "unknown AST node"
+    if "not yet supported" in m:
+        return "not yet supported"
+    if "nth: index out" in m or "nth:" in m:
+        return "nth index error"
+    if "timeout" in m:
+        return "timeout"
+    # Strip SX-side wrapping and trim
+    trimmed = msg.strip('"').strip()
+    return f"other: {trimmed[:80]}"
+
+
+class Session:
+    def __init__(self, sx_server: Path, timeout: float):
+        self.sx_server = sx_server
+        self.timeout = timeout
+        self.proc: subprocess.Popen | None = None
+        self._buf = b""
+        self._fd = -1
+
+    def start(self) -> None:
+        self.proc = subprocess.Popen(
+            [str(self.sx_server)],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            cwd=str(REPO),
+            bufsize=0,
+        )
+        self._fd = self.proc.stdout.fileno()
+        self._buf = b""
+        os.set_blocking(self._fd, False)
+        self._wait_for("(ready)", timeout=15.0)
+        self._run(1, '(load "lib/lua/tokenizer.sx")', 60)
+        self._run(2, '(load "lib/lua/parser.sx")', 60)
+        self._run(3, '(load "lib/lua/runtime.sx")', 60)
+        self._run(4, '(load "lib/lua/transpile.sx")', 60)
+
+    def stop(self) -> None:
+        if self.proc is None:
+            return
+        try:
+            self.proc.stdin.close()
+        except Exception:
+            pass
+        try:
+            self.proc.terminate()
+            self.proc.wait(timeout=3)
+        except Exception:
+            try:
+                self.proc.kill()
+            except Exception:
+                pass
+        self.proc = None
+
+    def _readline(self, timeout: float) -> str | None:
+        deadline = time.monotonic() + timeout
+        while True:
+            nl = self._buf.find(b"\n")
+            if nl >= 0:
+                line = self._buf[: nl + 1]
+                self._buf = self._buf[nl + 1 :]
+                return line.decode("utf-8", errors="replace")
+            remaining = deadline - time.monotonic()
+            if remaining <= 0:
+                raise TimeoutError("readline timeout")
+            try:
+                rlist, _, _ = select.select([self._fd], [], [], remaining)
+            except (OSError, ValueError):
+                return None
+            if not rlist:
+                raise TimeoutError("readline timeout")
+            try:
+                chunk = os.read(self._fd, 65536)
+            except (BlockingIOError, InterruptedError):
+                continue
+            except OSError:
+                return None
+            if not chunk:
+                if self._buf:
+                    rv = self._buf.decode("utf-8", errors="replace")
+                    self._buf = b""
+                    return rv
+                return None
+            self._buf += chunk
+
+    def _wait_for(self, token: str, timeout: float) -> None:
+        start = time.monotonic()
+        while time.monotonic() - start < timeout:
+            line = self._readline(timeout - (time.monotonic() - start))
+            if line is None:
+                raise RuntimeError("sx_server closed stdout before ready")
+            if token in line:
+                return
+        raise TimeoutError(f"timeout waiting for {token}")
+
+    def _run(self, epoch: int, cmd: str, timeout: float):
+        payload = f"(epoch {epoch})\n{cmd}\n".encode("utf-8")
+        try:
+            self.proc.stdin.write(payload)
+            self.proc.stdin.flush()
+        except (BrokenPipeError, OSError):
+            raise RuntimeError("sx_server stdin closed")
+        deadline = time.monotonic() + timeout
+        while time.monotonic() < deadline:
+            remaining = deadline - time.monotonic()
+            if remaining <= 0:
+                raise TimeoutError(f"epoch {epoch} timeout")
+            line = self._readline(remaining)
+            if line is None:
+                raise RuntimeError("sx_server closed stdout mid-epoch")
+            m = RX_OK_INLINE.match(line)
+            if m and int(m.group(1)) == epoch:
+                return "ok", m.group(2)
+            m = RX_OK_LEN.match(line)
+            if m and int(m.group(1)) == epoch:
+                val = self._readline(deadline - time.monotonic()) or ""
+                return "ok", val.rstrip("\n")
+            m = RX_ERR.match(line)
+            if m and int(m.group(1)) == epoch:
+                return "error", m.group(2)
+        raise TimeoutError(f"epoch {epoch} timeout")
+
+    def run_lua(self, epoch: int, src: str):
+        escaped = sx_escape_nested(src)
+        cmd = f'(eval "(lua-eval-ast \\"{escaped}\\")")'
+        return self._run(epoch, cmd, self.timeout)
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--per-test-timeout", type=float, default=DEFAULT_TIMEOUT)
+    ap.add_argument("--filter", type=str, default=None,
+                    help="only run tests whose filename contains this substring")
+    ap.add_argument("-v", "--verbose", action="store_true")
+    ap.add_argument("--no-scoreboard", action="store_true",
+                    help="do not write scoreboard.{json,md}")
+    args = ap.parse_args()
+
+    sx_server = pick_sx_server()
+    if not sx_server.exists():
+        print(f"ERROR: sx_server not found at {sx_server}", file=sys.stderr)
+        return 1
+    if not TESTS_DIR.exists():
+        print(f"ERROR: no tests dir at {TESTS_DIR}", file=sys.stderr)
+        return 1
+
+    tests = sorted(TESTS_DIR.glob("*.lua"))
+    if args.filter:
+        tests = [p for p in tests if args.filter in p.name]
+    if not tests:
+        print("No tests matched.", file=sys.stderr)
+        return 1
+
+    print(f"Running {len(tests)} Lua test file(s)…", file=sys.stderr)
+    session = Session(sx_server, args.per_test_timeout)
+    session.start()
+
+    results = []
+    failure_modes: Counter = Counter()
+
+    try:
+        for i, path in enumerate(tests, start=1):
+            name = path.name
+            skip_reason = HARDCODED_SKIP.get(name)
+            if skip_reason:
+                results.append({"name": name, "status": "skip", "reason": skip_reason, "ms": 0})
+                if args.verbose:
+                    print(f"  - {name}: SKIP ({skip_reason})")
+                continue
+
+            try:
+                src = path.read_text(encoding="utf-8")
+            except UnicodeDecodeError:
+                src = path.read_text(encoding="latin-1")
+            t0 = time.monotonic()
+            try:
+                kind, payload = session.run_lua(100 + i, src)
+                ms = int((time.monotonic() - t0) * 1000)
+                if kind == "ok":
+                    results.append({"name": name, "status": "pass", "reason": "", "ms": ms})
+                    if args.verbose:
+                        print(f"  + {name}: PASS ({ms}ms)")
+                else:
+                    reason = classify_error(payload)
+                    failure_modes[reason] += 1
+                    results.append({"name": name, "status": "fail", "reason": reason, "ms": ms})
+                    if args.verbose:
+                        print(f"  - {name}: FAIL — {reason}")
+            except TimeoutError:
+                ms = int((time.monotonic() - t0) * 1000)
+                failure_modes["timeout"] += 1
+                results.append({"name": name, "status": "timeout", "reason": "per-test timeout",
+                                "ms": ms})
+                if args.verbose:
+                    print(f"  - {name}: TIMEOUT ({ms}ms)")
+                # Restart after a timeout to shed any stuck state.
+                session.stop()
+                session.start()
+    finally:
+        session.stop()
+
+    n_pass = sum(1 for r in results if r["status"] == "pass")
+    n_fail = sum(1 for r in results if r["status"] == "fail")
+    n_timeout = sum(1 for r in results if r["status"] == "timeout")
+    n_skip = sum(1 for r in results if r["status"] == "skip")
+    n_total = len(results)
+    n_runnable = n_total - n_skip
+    pct = (n_pass / n_runnable * 100.0) if n_runnable else 0.0
+
+    print()
+    print(f"Lua-on-SX conformance: {n_pass}/{n_runnable} runnable pass ({pct:.1f}%)  "
+          f"fail={n_fail} timeout={n_timeout} skip={n_skip} total={n_total}")
+    if failure_modes:
+        print("Top failure modes:")
+        for mode, count in failure_modes.most_common(10):
+            print(f"  {count}x  {mode}")
+
+    if not args.no_scoreboard:
+        sb = {
+            "totals": {
+                "pass": n_pass, "fail": n_fail, "timeout": n_timeout,
+                "skip": n_skip, "total": n_total, "runnable": n_runnable,
+                "pass_rate": round(pct, 1),
+            },
+            "top_failure_modes": failure_modes.most_common(20),
+            "results": results,
+        }
+        (REPO / "lib" / "lua" / "scoreboard.json").write_text(
+            json.dumps(sb, indent=2), encoding="utf-8"
+        )
+        md = [
+            "# Lua-on-SX conformance scoreboard",
+            "",
+            f"**Pass rate:** {n_pass}/{n_runnable} runnable ({pct:.1f}%)",
+            f"fail={n_fail} timeout={n_timeout} skip={n_skip} total={n_total}",
+            "",
+            "## Top failure modes",
+            "",
+        ]
+        for mode, count in failure_modes.most_common(10):
+            md.append(f"- **{count}x** {mode}")
+        md.extend(["", "## Per-test results", "",
+                   "| Test | Status | Reason | ms |",
+                   "|---|---|---|---:|"])
+        for r in results:
+            reason = r["reason"] or "-"
+            md.append(f"| {r['name']} | {r['status']} | {reason} | {r['ms']} |")
+        (REPO / "lib" / "lua" / "scoreboard.md").write_text(
+            "\n".join(md) + "\n", encoding="utf-8"
+        )
+
+    return 0 if (n_fail == 0 and n_timeout == 0) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/lib/lua/conformance.sh
+++ b/lib/lua/conformance.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Lua-on-SX conformance runner — walks lib/lua/lua-tests/*.lua, runs each via
+# `lua-eval-ast` on a long-lived sx_server.exe subprocess, classifies
+# pass/fail/timeout, and writes lib/lua/scoreboard.{json,md}.
+#
+# Usage:
+#   bash lib/lua/conformance.sh               # full suite
+#   bash lib/lua/conformance.sh --filter sort # filter by filename substring
+#   bash lib/lua/conformance.sh -v            # per-file verbose
+
+set -uo pipefail
+cd "$(git rev-parse --show-toplevel)"
+exec python3 lib/lua/conformance.py "$@"
--- a/plans/lua-on-sx.md
+++ b/plans/lua-on-sx.md
@@ -56,7 +56,7 @@ Each item: implement → tests → tick box → update progress log.
 - [x] Table constructors (array + hash + computed keys)
 - [x] Raw table access `t.k` / `t[k]` (no metatables yet)
 - [x] Vendor PUC-Rio 5.1.5 suite to `lib/lua/lua-tests/` (just `.lua` files)
- [ ] `lib/lua/conformance.sh` + Python runner (model on `lib/js/test262-runner.py`)
+- [x] `lib/lua/conformance.sh` + Python runner (model on `lib/js/test262-runner.py`)
 - [ ] `scoreboard.json` + `scoreboard.md` baseline

 ### Phase 4 — metatables + error handling (next run)
@@ -82,6 +82,7 @@ Each item: implement → tests → tick box → update progress log.

 _Newest first. Agent appends on every commit._

+- 2026-04-24: lua: conformance runner — `conformance.sh` shim + `conformance.py` (long-lived sx_server, epoch protocol, classify_error, writes scoreboard.{json,md}). 24 files classified in full run: 8 skip / 16 fail / 0 timeout.
 - 2026-04-24: lua: vendored PUC-Rio 5.1 test suite (lua5.1-tests.tar.gz from lua.org) to `lib/lua/lua-tests/` — 22 .lua files, 6304 lines; README kept for context.
 - 2026-04-24: lua: raw table access — fix `lua-set!` to use `dict-set!` (mutating), fix `lua-len` `has?`→`has-key?`, `#t` works, mutation/chained/computed-key writes + reference semantics. 224 total tests.
 - 2026-04-24: lua: phase 3 — table constructors verified (array, hash, computed keys, mixed, nested, dynamic values, fn values, sep variants). 205 total tests.