lua: conformance.sh + Python runner (writes scoreboard.{json,md})
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
This commit is contained in:
348
lib/lua/conformance.py
Executable file
348
lib/lua/conformance.py
Executable file
@@ -0,0 +1,348 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""lua-conformance — run the PUC-Rio Lua 5.1 test suite against Lua-on-SX.
|
||||||
|
|
||||||
|
Walks lib/lua/lua-tests/*.lua, evaluates each via `lua-eval-ast` on a
|
||||||
|
long-lived sx_server.exe subprocess, classifies pass/fail/timeout per file,
|
||||||
|
and writes lib/lua/scoreboard.{json,md}.
|
||||||
|
|
||||||
|
Modelled on lib/js/test262-runner.py but much simpler: each Lua test file is
|
||||||
|
its own unit (they're self-contained assertion scripts; they pass if they
|
||||||
|
complete without raising). No harness stub, no frontmatter, no worker pool.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 lib/lua/conformance.py
|
||||||
|
python3 lib/lua/conformance.py --filter locals
|
||||||
|
python3 lib/lua/conformance.py --per-test-timeout 3 -v
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import select
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from collections import Counter
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[2]
|
||||||
|
SX_SERVER_PRIMARY = REPO / "hosts" / "ocaml" / "_build" / "default" / "bin" / "sx_server.exe"
|
||||||
|
SX_SERVER_FALLBACK = Path("/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe")
|
||||||
|
TESTS_DIR = REPO / "lib" / "lua" / "lua-tests"
|
||||||
|
|
||||||
|
DEFAULT_TIMEOUT = 8.0
|
||||||
|
|
||||||
|
# Files that require facilities we don't (and won't soon) support.
|
||||||
|
# Still classified as skip rather than fail so the scoreboard stays honest.
|
||||||
|
HARDCODED_SKIP = {
|
||||||
|
"all.lua": "driver uses dofile to chain other tests",
|
||||||
|
"api.lua": "requires testC (C debug library)",
|
||||||
|
"checktable.lua": "internal debug helpers",
|
||||||
|
"code.lua": "bytecode inspection via debug library",
|
||||||
|
"db.lua": "debug library",
|
||||||
|
"files.lua": "io library",
|
||||||
|
"gc.lua": "collectgarbage / finalisers",
|
||||||
|
"main.lua": "standalone interpreter driver",
|
||||||
|
}
|
||||||
|
|
||||||
|
RX_OK_INLINE = re.compile(r"^\(ok (\d+) (.*)\)\s*$")
|
||||||
|
RX_OK_LEN = re.compile(r"^\(ok-len (\d+) \d+\)\s*$")
|
||||||
|
RX_ERR = re.compile(r"^\(error (\d+) (.*)\)\s*$")
|
||||||
|
|
||||||
|
|
||||||
|
def pick_sx_server() -> Path:
|
||||||
|
if SX_SERVER_PRIMARY.exists():
|
||||||
|
return SX_SERVER_PRIMARY
|
||||||
|
return SX_SERVER_FALLBACK
|
||||||
|
|
||||||
|
|
||||||
|
def sx_escape_nested(s: str) -> str:
|
||||||
|
"""Two-level escape: (eval "(lua-eval-ast \"<src>\")").
|
||||||
|
|
||||||
|
Outer literal is consumed by `eval` then the inner literal by `lua-eval-ast`.
|
||||||
|
"""
|
||||||
|
inner = (
|
||||||
|
s.replace("\\", "\\\\")
|
||||||
|
.replace('"', '\\"')
|
||||||
|
.replace("\n", "\\n")
|
||||||
|
.replace("\r", "\\r")
|
||||||
|
.replace("\t", "\\t")
|
||||||
|
)
|
||||||
|
return inner.replace("\\", "\\\\").replace('"', '\\"')
|
||||||
|
|
||||||
|
|
||||||
|
def classify_error(msg: str) -> str:
|
||||||
|
m = msg.lower()
|
||||||
|
sym = re.search(r"undefined symbol:\s*\\?\"?([^\"\s)]+)", msg, re.I)
|
||||||
|
if sym:
|
||||||
|
return f"undefined symbol: {sym.group(1).strip(chr(34))}"
|
||||||
|
if "undefined symbol" in m:
|
||||||
|
return "undefined symbol"
|
||||||
|
if "lua: arith" in m:
|
||||||
|
return "arith type error"
|
||||||
|
if "lua-transpile" in m:
|
||||||
|
return "transpile: unsupported node"
|
||||||
|
if "lua-parse" in m:
|
||||||
|
return "parse error"
|
||||||
|
if "lua-tokenize" in m:
|
||||||
|
return "tokenize error"
|
||||||
|
if "unknown node" in m:
|
||||||
|
return "unknown AST node"
|
||||||
|
if "not yet supported" in m:
|
||||||
|
return "not yet supported"
|
||||||
|
if "nth: index out" in m or "nth:" in m:
|
||||||
|
return "nth index error"
|
||||||
|
if "timeout" in m:
|
||||||
|
return "timeout"
|
||||||
|
# Strip SX-side wrapping and trim
|
||||||
|
trimmed = msg.strip('"').strip()
|
||||||
|
return f"other: {trimmed[:80]}"
|
||||||
|
|
||||||
|
|
||||||
|
class Session:
|
||||||
|
def __init__(self, sx_server: Path, timeout: float):
|
||||||
|
self.sx_server = sx_server
|
||||||
|
self.timeout = timeout
|
||||||
|
self.proc: subprocess.Popen | None = None
|
||||||
|
self._buf = b""
|
||||||
|
self._fd = -1
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
self.proc = subprocess.Popen(
|
||||||
|
[str(self.sx_server)],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
cwd=str(REPO),
|
||||||
|
bufsize=0,
|
||||||
|
)
|
||||||
|
self._fd = self.proc.stdout.fileno()
|
||||||
|
self._buf = b""
|
||||||
|
os.set_blocking(self._fd, False)
|
||||||
|
self._wait_for("(ready)", timeout=15.0)
|
||||||
|
self._run(1, '(load "lib/lua/tokenizer.sx")', 60)
|
||||||
|
self._run(2, '(load "lib/lua/parser.sx")', 60)
|
||||||
|
self._run(3, '(load "lib/lua/runtime.sx")', 60)
|
||||||
|
self._run(4, '(load "lib/lua/transpile.sx")', 60)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
if self.proc is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.proc.stdin.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.proc.terminate()
|
||||||
|
self.proc.wait(timeout=3)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
self.proc.kill()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self.proc = None
|
||||||
|
|
||||||
|
def _readline(self, timeout: float) -> str | None:
|
||||||
|
deadline = time.monotonic() + timeout
|
||||||
|
while True:
|
||||||
|
nl = self._buf.find(b"\n")
|
||||||
|
if nl >= 0:
|
||||||
|
line = self._buf[: nl + 1]
|
||||||
|
self._buf = self._buf[nl + 1 :]
|
||||||
|
return line.decode("utf-8", errors="replace")
|
||||||
|
remaining = deadline - time.monotonic()
|
||||||
|
if remaining <= 0:
|
||||||
|
raise TimeoutError("readline timeout")
|
||||||
|
try:
|
||||||
|
rlist, _, _ = select.select([self._fd], [], [], remaining)
|
||||||
|
except (OSError, ValueError):
|
||||||
|
return None
|
||||||
|
if not rlist:
|
||||||
|
raise TimeoutError("readline timeout")
|
||||||
|
try:
|
||||||
|
chunk = os.read(self._fd, 65536)
|
||||||
|
except (BlockingIOError, InterruptedError):
|
||||||
|
continue
|
||||||
|
except OSError:
|
||||||
|
return None
|
||||||
|
if not chunk:
|
||||||
|
if self._buf:
|
||||||
|
rv = self._buf.decode("utf-8", errors="replace")
|
||||||
|
self._buf = b""
|
||||||
|
return rv
|
||||||
|
return None
|
||||||
|
self._buf += chunk
|
||||||
|
|
||||||
|
def _wait_for(self, token: str, timeout: float) -> None:
|
||||||
|
start = time.monotonic()
|
||||||
|
while time.monotonic() - start < timeout:
|
||||||
|
line = self._readline(timeout - (time.monotonic() - start))
|
||||||
|
if line is None:
|
||||||
|
raise RuntimeError("sx_server closed stdout before ready")
|
||||||
|
if token in line:
|
||||||
|
return
|
||||||
|
raise TimeoutError(f"timeout waiting for {token}")
|
||||||
|
|
||||||
|
def _run(self, epoch: int, cmd: str, timeout: float):
|
||||||
|
payload = f"(epoch {epoch})\n{cmd}\n".encode("utf-8")
|
||||||
|
try:
|
||||||
|
self.proc.stdin.write(payload)
|
||||||
|
self.proc.stdin.flush()
|
||||||
|
except (BrokenPipeError, OSError):
|
||||||
|
raise RuntimeError("sx_server stdin closed")
|
||||||
|
deadline = time.monotonic() + timeout
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
remaining = deadline - time.monotonic()
|
||||||
|
if remaining <= 0:
|
||||||
|
raise TimeoutError(f"epoch {epoch} timeout")
|
||||||
|
line = self._readline(remaining)
|
||||||
|
if line is None:
|
||||||
|
raise RuntimeError("sx_server closed stdout mid-epoch")
|
||||||
|
m = RX_OK_INLINE.match(line)
|
||||||
|
if m and int(m.group(1)) == epoch:
|
||||||
|
return "ok", m.group(2)
|
||||||
|
m = RX_OK_LEN.match(line)
|
||||||
|
if m and int(m.group(1)) == epoch:
|
||||||
|
val = self._readline(deadline - time.monotonic()) or ""
|
||||||
|
return "ok", val.rstrip("\n")
|
||||||
|
m = RX_ERR.match(line)
|
||||||
|
if m and int(m.group(1)) == epoch:
|
||||||
|
return "error", m.group(2)
|
||||||
|
raise TimeoutError(f"epoch {epoch} timeout")
|
||||||
|
|
||||||
|
def run_lua(self, epoch: int, src: str):
|
||||||
|
escaped = sx_escape_nested(src)
|
||||||
|
cmd = f'(eval "(lua-eval-ast \\"{escaped}\\")")'
|
||||||
|
return self._run(epoch, cmd, self.timeout)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--per-test-timeout", type=float, default=DEFAULT_TIMEOUT)
|
||||||
|
ap.add_argument("--filter", type=str, default=None,
|
||||||
|
help="only run tests whose filename contains this substring")
|
||||||
|
ap.add_argument("-v", "--verbose", action="store_true")
|
||||||
|
ap.add_argument("--no-scoreboard", action="store_true",
|
||||||
|
help="do not write scoreboard.{json,md}")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
sx_server = pick_sx_server()
|
||||||
|
if not sx_server.exists():
|
||||||
|
print(f"ERROR: sx_server not found at {sx_server}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
if not TESTS_DIR.exists():
|
||||||
|
print(f"ERROR: no tests dir at {TESTS_DIR}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
tests = sorted(TESTS_DIR.glob("*.lua"))
|
||||||
|
if args.filter:
|
||||||
|
tests = [p for p in tests if args.filter in p.name]
|
||||||
|
if not tests:
|
||||||
|
print("No tests matched.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"Running {len(tests)} Lua test file(s)…", file=sys.stderr)
|
||||||
|
session = Session(sx_server, args.per_test_timeout)
|
||||||
|
session.start()
|
||||||
|
|
||||||
|
results = []
|
||||||
|
failure_modes: Counter = Counter()
|
||||||
|
|
||||||
|
try:
|
||||||
|
for i, path in enumerate(tests, start=1):
|
||||||
|
name = path.name
|
||||||
|
skip_reason = HARDCODED_SKIP.get(name)
|
||||||
|
if skip_reason:
|
||||||
|
results.append({"name": name, "status": "skip", "reason": skip_reason, "ms": 0})
|
||||||
|
if args.verbose:
|
||||||
|
print(f" - {name}: SKIP ({skip_reason})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
src = path.read_text(encoding="utf-8")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
src = path.read_text(encoding="latin-1")
|
||||||
|
t0 = time.monotonic()
|
||||||
|
try:
|
||||||
|
kind, payload = session.run_lua(100 + i, src)
|
||||||
|
ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
if kind == "ok":
|
||||||
|
results.append({"name": name, "status": "pass", "reason": "", "ms": ms})
|
||||||
|
if args.verbose:
|
||||||
|
print(f" + {name}: PASS ({ms}ms)")
|
||||||
|
else:
|
||||||
|
reason = classify_error(payload)
|
||||||
|
failure_modes[reason] += 1
|
||||||
|
results.append({"name": name, "status": "fail", "reason": reason, "ms": ms})
|
||||||
|
if args.verbose:
|
||||||
|
print(f" - {name}: FAIL — {reason}")
|
||||||
|
except TimeoutError:
|
||||||
|
ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
failure_modes["timeout"] += 1
|
||||||
|
results.append({"name": name, "status": "timeout", "reason": "per-test timeout",
|
||||||
|
"ms": ms})
|
||||||
|
if args.verbose:
|
||||||
|
print(f" - {name}: TIMEOUT ({ms}ms)")
|
||||||
|
# Restart after a timeout to shed any stuck state.
|
||||||
|
session.stop()
|
||||||
|
session.start()
|
||||||
|
finally:
|
||||||
|
session.stop()
|
||||||
|
|
||||||
|
n_pass = sum(1 for r in results if r["status"] == "pass")
|
||||||
|
n_fail = sum(1 for r in results if r["status"] == "fail")
|
||||||
|
n_timeout = sum(1 for r in results if r["status"] == "timeout")
|
||||||
|
n_skip = sum(1 for r in results if r["status"] == "skip")
|
||||||
|
n_total = len(results)
|
||||||
|
n_runnable = n_total - n_skip
|
||||||
|
pct = (n_pass / n_runnable * 100.0) if n_runnable else 0.0
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f"Lua-on-SX conformance: {n_pass}/{n_runnable} runnable pass ({pct:.1f}%) "
|
||||||
|
f"fail={n_fail} timeout={n_timeout} skip={n_skip} total={n_total}")
|
||||||
|
if failure_modes:
|
||||||
|
print("Top failure modes:")
|
||||||
|
for mode, count in failure_modes.most_common(10):
|
||||||
|
print(f" {count}x {mode}")
|
||||||
|
|
||||||
|
if not args.no_scoreboard:
|
||||||
|
sb = {
|
||||||
|
"totals": {
|
||||||
|
"pass": n_pass, "fail": n_fail, "timeout": n_timeout,
|
||||||
|
"skip": n_skip, "total": n_total, "runnable": n_runnable,
|
||||||
|
"pass_rate": round(pct, 1),
|
||||||
|
},
|
||||||
|
"top_failure_modes": failure_modes.most_common(20),
|
||||||
|
"results": results,
|
||||||
|
}
|
||||||
|
(REPO / "lib" / "lua" / "scoreboard.json").write_text(
|
||||||
|
json.dumps(sb, indent=2), encoding="utf-8"
|
||||||
|
)
|
||||||
|
md = [
|
||||||
|
"# Lua-on-SX conformance scoreboard",
|
||||||
|
"",
|
||||||
|
f"**Pass rate:** {n_pass}/{n_runnable} runnable ({pct:.1f}%)",
|
||||||
|
f"fail={n_fail} timeout={n_timeout} skip={n_skip} total={n_total}",
|
||||||
|
"",
|
||||||
|
"## Top failure modes",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
for mode, count in failure_modes.most_common(10):
|
||||||
|
md.append(f"- **{count}x** {mode}")
|
||||||
|
md.extend(["", "## Per-test results", "",
|
||||||
|
"| Test | Status | Reason | ms |",
|
||||||
|
"|---|---|---|---:|"])
|
||||||
|
for r in results:
|
||||||
|
reason = r["reason"] or "-"
|
||||||
|
md.append(f"| {r['name']} | {r['status']} | {reason} | {r['ms']} |")
|
||||||
|
(REPO / "lib" / "lua" / "scoreboard.md").write_text(
|
||||||
|
"\n".join(md) + "\n", encoding="utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
return 0 if (n_fail == 0 and n_timeout == 0) else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
13
lib/lua/conformance.sh
Executable file
13
lib/lua/conformance.sh
Executable file
@@ -0,0 +1,13 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Lua-on-SX conformance runner — walks lib/lua/lua-tests/*.lua, runs each via
|
||||||
|
# `lua-eval-ast` on a long-lived sx_server.exe subprocess, classifies
|
||||||
|
# pass/fail/timeout, and writes lib/lua/scoreboard.{json,md}.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash lib/lua/conformance.sh # full suite
|
||||||
|
# bash lib/lua/conformance.sh --filter sort # filter by filename substring
|
||||||
|
# bash lib/lua/conformance.sh -v # per-file verbose
|
||||||
|
|
||||||
|
set -uo pipefail
|
||||||
|
cd "$(git rev-parse --show-toplevel)"
|
||||||
|
exec python3 lib/lua/conformance.py "$@"
|
||||||
@@ -56,7 +56,7 @@ Each item: implement → tests → tick box → update progress log.
|
|||||||
- [x] Table constructors (array + hash + computed keys)
|
- [x] Table constructors (array + hash + computed keys)
|
||||||
- [x] Raw table access `t.k` / `t[k]` (no metatables yet)
|
- [x] Raw table access `t.k` / `t[k]` (no metatables yet)
|
||||||
- [x] Vendor PUC-Rio 5.1.5 suite to `lib/lua/lua-tests/` (just `.lua` files)
|
- [x] Vendor PUC-Rio 5.1.5 suite to `lib/lua/lua-tests/` (just `.lua` files)
|
||||||
- [ ] `lib/lua/conformance.sh` + Python runner (model on `lib/js/test262-runner.py`)
|
- [x] `lib/lua/conformance.sh` + Python runner (model on `lib/js/test262-runner.py`)
|
||||||
- [ ] `scoreboard.json` + `scoreboard.md` baseline
|
- [ ] `scoreboard.json` + `scoreboard.md` baseline
|
||||||
|
|
||||||
### Phase 4 — metatables + error handling (next run)
|
### Phase 4 — metatables + error handling (next run)
|
||||||
@@ -82,6 +82,7 @@ Each item: implement → tests → tick box → update progress log.
|
|||||||
|
|
||||||
_Newest first. Agent appends on every commit._
|
_Newest first. Agent appends on every commit._
|
||||||
|
|
||||||
|
- 2026-04-24: lua: conformance runner — `conformance.sh` shim + `conformance.py` (long-lived sx_server, epoch protocol, classify_error, writes scoreboard.{json,md}). 24 files classified in full run: 8 skip / 16 fail / 0 timeout.
|
||||||
- 2026-04-24: lua: vendored PUC-Rio 5.1 test suite (lua5.1-tests.tar.gz from lua.org) to `lib/lua/lua-tests/` — 22 .lua files, 6304 lines; README kept for context.
|
- 2026-04-24: lua: vendored PUC-Rio 5.1 test suite (lua5.1-tests.tar.gz from lua.org) to `lib/lua/lua-tests/` — 22 .lua files, 6304 lines; README kept for context.
|
||||||
- 2026-04-24: lua: raw table access — fix `lua-set!` to use `dict-set!` (mutating), fix `lua-len` `has?`→`has-key?`, `#t` works, mutation/chained/computed-key writes + reference semantics. 224 total tests.
|
- 2026-04-24: lua: raw table access — fix `lua-set!` to use `dict-set!` (mutating), fix `lua-len` `has?`→`has-key?`, `#t` works, mutation/chained/computed-key writes + reference semantics. 224 total tests.
|
||||||
- 2026-04-24: lua: phase 3 — table constructors verified (array, hash, computed keys, mixed, nested, dynamic values, fn values, sep variants). 205 total tests.
|
- 2026-04-24: lua: phase 3 — table constructors verified (array, hash, computed keys, mixed, nested, dynamic values, fn values, sep variants). 205 total tests.
|
||||||
|
|||||||
Reference in New Issue
Block a user