forth: Hayes conformance runner + baseline scoreboard (165/590, 28%)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
This commit is contained in:
169
lib/forth/conformance.sh
Executable file
169
lib/forth/conformance.sh
Executable file
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env bash
|
||||
# Run the Hayes/Gerry-Jackson Core conformance suite against our Forth
|
||||
# interpreter and emit scoreboard.json + scoreboard.md.
|
||||
#
|
||||
# Method:
|
||||
# 1. Preprocess lib/forth/ans-tests/core.fr — strip \ comments, ( ... )
|
||||
# comments, and TESTING … metadata lines.
|
||||
# 2. Split into chunks ending at each `}T` so an error in one test
|
||||
# chunk doesn't abort the run.
|
||||
# 3. Emit an SX file that exposes those chunks as a list.
|
||||
# 4. Run our Forth + hayes-runner under sx_server; record pass/fail/error.
|
||||
|
||||
set -e
|
||||
FORTH_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ROOT="$(cd "$FORTH_DIR/../.." && pwd)"
|
||||
SX_SERVER="${SX_SERVER:-/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||
SOURCE="$FORTH_DIR/ans-tests/core.fr"
|
||||
OUT_JSON="$FORTH_DIR/scoreboard.json"
|
||||
OUT_MD="$FORTH_DIR/scoreboard.md"
|
||||
TMP="$(mktemp -d)"
|
||||
PREPROC="$TMP/preproc.forth"
|
||||
CHUNKS_SX="$TMP/chunks.sx"
|
||||
|
||||
cd "$ROOT"
|
||||
|
||||
# 1. preprocess
|
||||
awk '
|
||||
{
|
||||
line = $0
|
||||
# strip leading/embedded \ line comments (must be \ followed by space or EOL)
|
||||
gsub(/(^|[ \t])\\([ \t].*|$)/, " ", line)
|
||||
# strip ( ... ) block comments that sit on one line
|
||||
gsub(/\([^)]*\)/, " ", line)
|
||||
# strip TESTING … metadata lines (rest of line, incl. bare TESTING)
|
||||
sub(/TESTING([ \t].*)?$/, " ", line)
|
||||
print line
|
||||
}' "$SOURCE" > "$PREPROC"
|
||||
|
||||
# 2 + 3: split into chunks at each `}T` and emit as a SX file
|
||||
#
|
||||
# Cap chunks via MAX_CHUNKS env (default 590) — a small number of later
|
||||
# tests enter infinite runtime loops (e.g. COUNT-BITS with unsigned wrap)
|
||||
# that our bignum-based interpreter can't terminate. Raise the cap as
|
||||
# those tests unblock.
|
||||
MAX_CHUNKS="${MAX_CHUNKS:-590}"
|
||||
|
||||
MAX_CHUNKS="$MAX_CHUNKS" python3 - "$PREPROC" "$CHUNKS_SX" <<'PY'
|
||||
import os, re, sys
|
||||
preproc_path, out_path = sys.argv[1], sys.argv[2]
|
||||
max_chunks = int(os.environ.get("MAX_CHUNKS", "590"))
|
||||
text = open(preproc_path).read()
|
||||
# keep the `}T` attached to the preceding chunk
|
||||
parts = re.split(r'(\}T)', text)
|
||||
chunks = []
|
||||
buf = ""
|
||||
for p in parts:
|
||||
buf += p
|
||||
if p == "}T":
|
||||
s = buf.strip()
|
||||
if s:
|
||||
chunks.append(s)
|
||||
buf = ""
|
||||
if buf.strip():
|
||||
chunks.append(buf.strip())
|
||||
chunks = chunks[:max_chunks]
|
||||
|
||||
def esc(s):
|
||||
s = s.replace('\\', '\\\\').replace('"', '\\"')
|
||||
s = s.replace('\r', ' ').replace('\n', ' ')
|
||||
s = re.sub(r'\s+', ' ', s).strip()
|
||||
return s
|
||||
|
||||
with open(out_path, "w") as f:
|
||||
f.write("(define hayes-chunks (list\n")
|
||||
for c in chunks:
|
||||
f.write(' "' + esc(c) + '"\n')
|
||||
f.write("))\n\n")
|
||||
f.write("(define\n")
|
||||
f.write(" hayes-run-all\n")
|
||||
f.write(" (fn\n")
|
||||
f.write(" ()\n")
|
||||
f.write(" (hayes-reset!)\n")
|
||||
f.write(" (let ((s (hayes-boot)))\n")
|
||||
f.write(" (for-each (fn (c) (hayes-run-chunk s c)) hayes-chunks))\n")
|
||||
f.write(" (hayes-summary)))\n")
|
||||
PY
|
||||
|
||||
# 4. run it
|
||||
OUT=$(printf '(epoch 1)\n(load "lib/forth/runtime.sx")\n(epoch 2)\n(load "lib/forth/reader.sx")\n(epoch 3)\n(load "lib/forth/interpreter.sx")\n(epoch 4)\n(load "lib/forth/compiler.sx")\n(epoch 5)\n(load "lib/forth/hayes-runner.sx")\n(epoch 6)\n(load "%s")\n(epoch 7)\n(eval "(hayes-run-all)")\n' "$CHUNKS_SX" \
|
||||
| timeout 180 "$SX_SERVER" 2>&1)
|
||||
STATUS=$?
|
||||
|
||||
SUMMARY=$(printf '%s\n' "$OUT" | awk '/^\{:pass / {print; exit}')
|
||||
PASS=$(printf '%s' "$SUMMARY" | sed -n 's/.*:pass \([0-9-]*\).*/\1/p')
|
||||
FAIL=$(printf '%s' "$SUMMARY" | sed -n 's/.*:fail \([0-9-]*\).*/\1/p')
|
||||
ERR=$(printf '%s' "$SUMMARY" | sed -n 's/.*:error \([0-9-]*\).*/\1/p')
|
||||
TOTAL=$(printf '%s' "$SUMMARY" | sed -n 's/.*:total \([0-9-]*\).*/\1/p')
|
||||
CHUNK_COUNT=$(grep -c '^ "' "$CHUNKS_SX" || echo 0)
|
||||
TOTAL_AVAILABLE=$(grep -c '}T' "$PREPROC" || echo 0)
|
||||
|
||||
NOW="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
|
||||
if [ -z "$PASS" ]; then
|
||||
PASS=0; FAIL=0; ERR=0; TOTAL=0
|
||||
NOTE="runner halted before completing (timeout or SX error)"
|
||||
else
|
||||
NOTE="completed"
|
||||
fi
|
||||
|
||||
PCT=0
|
||||
if [ "$TOTAL" -gt 0 ]; then
|
||||
PCT=$((PASS * 100 / TOTAL))
|
||||
fi
|
||||
|
||||
cat > "$OUT_JSON" <<JSON
|
||||
{
|
||||
"source": "gerryjackson/forth2012-test-suite src/core.fr",
|
||||
"generated_at": "$NOW",
|
||||
"chunks_available": $TOTAL_AVAILABLE,
|
||||
"chunks_fed": $CHUNK_COUNT,
|
||||
"total": $TOTAL,
|
||||
"pass": $PASS,
|
||||
"fail": $FAIL,
|
||||
"error": $ERR,
|
||||
"percent": $PCT,
|
||||
"note": "$NOTE"
|
||||
}
|
||||
JSON
|
||||
|
||||
cat > "$OUT_MD" <<MD
|
||||
# Forth Hayes Core scoreboard
|
||||
|
||||
| metric | value |
|
||||
| ----------------- | ----: |
|
||||
| chunks available | $TOTAL_AVAILABLE |
|
||||
| chunks fed | $CHUNK_COUNT |
|
||||
| total | $TOTAL |
|
||||
| pass | $PASS |
|
||||
| fail | $FAIL |
|
||||
| error | $ERR |
|
||||
| percent | ${PCT}% |
|
||||
|
||||
- **Source**: \`gerryjackson/forth2012-test-suite\` \`src/core.fr\`
|
||||
- **Generated**: $NOW
|
||||
- **Note**: $NOTE
|
||||
|
||||
A "chunk" is any preprocessed segment ending at a \`}T\` (every Hayes test
|
||||
is one chunk, plus the small declaration blocks between tests).
|
||||
The runner catches raised errors at chunk boundaries so one bad chunk
|
||||
does not abort the rest. \`error\` covers chunks that raised; \`fail\`
|
||||
covers tests whose \`->\` / \`}T\` comparison mismatched.
|
||||
|
||||
### Chunk cap
|
||||
|
||||
\`conformance.sh\` processes the first \`\$MAX_CHUNKS\` chunks (default
|
||||
**590**). Past that, \`core.fr\` ships tests that rely on unsigned
|
||||
integer wrap-around (e.g. \`COUNT-BITS\` using \`BEGIN DUP WHILE … 2*
|
||||
REPEAT\`), which never terminates on our bignum-based interpreter. The
|
||||
cap should rise as those tests unblock — run with \`MAX_CHUNKS=639
|
||||
./conformance.sh\` once they do.
|
||||
MD
|
||||
|
||||
echo "$SUMMARY"
|
||||
echo "Scoreboard: $OUT_JSON"
|
||||
echo " $OUT_MD"
|
||||
|
||||
if [ "$STATUS" -ne 0 ] && [ "$TOTAL" -eq 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user