forth: Hayes conformance runner + baseline scoreboard (165/590, 28%)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
This commit is contained in:
169
lib/forth/conformance.sh
Executable file
169
lib/forth/conformance.sh
Executable file
@@ -0,0 +1,169 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Run the Hayes/Gerry-Jackson Core conformance suite against our Forth
|
||||||
|
# interpreter and emit scoreboard.json + scoreboard.md.
|
||||||
|
#
|
||||||
|
# Method:
|
||||||
|
# 1. Preprocess lib/forth/ans-tests/core.fr — strip \ comments, ( ... )
|
||||||
|
# comments, and TESTING … metadata lines.
|
||||||
|
# 2. Split into chunks ending at each `}T` so an error in one test
|
||||||
|
# chunk doesn't abort the run.
|
||||||
|
# 3. Emit an SX file that exposes those chunks as a list.
|
||||||
|
# 4. Run our Forth + hayes-runner under sx_server; record pass/fail/error.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
FORTH_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
ROOT="$(cd "$FORTH_DIR/../.." && pwd)"
|
||||||
|
SX_SERVER="${SX_SERVER:-/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||||
|
SOURCE="$FORTH_DIR/ans-tests/core.fr"
|
||||||
|
OUT_JSON="$FORTH_DIR/scoreboard.json"
|
||||||
|
OUT_MD="$FORTH_DIR/scoreboard.md"
|
||||||
|
TMP="$(mktemp -d)"
|
||||||
|
PREPROC="$TMP/preproc.forth"
|
||||||
|
CHUNKS_SX="$TMP/chunks.sx"
|
||||||
|
|
||||||
|
cd "$ROOT"
|
||||||
|
|
||||||
|
# 1. preprocess
|
||||||
|
awk '
|
||||||
|
{
|
||||||
|
line = $0
|
||||||
|
# strip leading/embedded \ line comments (must be \ followed by space or EOL)
|
||||||
|
gsub(/(^|[ \t])\\([ \t].*|$)/, " ", line)
|
||||||
|
# strip ( ... ) block comments that sit on one line
|
||||||
|
gsub(/\([^)]*\)/, " ", line)
|
||||||
|
# strip TESTING … metadata lines (rest of line, incl. bare TESTING)
|
||||||
|
sub(/TESTING([ \t].*)?$/, " ", line)
|
||||||
|
print line
|
||||||
|
}' "$SOURCE" > "$PREPROC"
|
||||||
|
|
||||||
|
# 2 + 3: split into chunks at each `}T` and emit as a SX file
|
||||||
|
#
|
||||||
|
# Cap chunks via MAX_CHUNKS env (default 590) — a small number of later
|
||||||
|
# tests enter infinite runtime loops (e.g. COUNT-BITS with unsigned wrap)
|
||||||
|
# that our bignum-based interpreter can't terminate. Raise the cap as
|
||||||
|
# those tests unblock.
|
||||||
|
MAX_CHUNKS="${MAX_CHUNKS:-590}"
|
||||||
|
|
||||||
|
MAX_CHUNKS="$MAX_CHUNKS" python3 - "$PREPROC" "$CHUNKS_SX" <<'PY'
|
||||||
|
import os, re, sys
|
||||||
|
preproc_path, out_path = sys.argv[1], sys.argv[2]
|
||||||
|
max_chunks = int(os.environ.get("MAX_CHUNKS", "590"))
|
||||||
|
text = open(preproc_path).read()
|
||||||
|
# keep the `}T` attached to the preceding chunk
|
||||||
|
parts = re.split(r'(\}T)', text)
|
||||||
|
chunks = []
|
||||||
|
buf = ""
|
||||||
|
for p in parts:
|
||||||
|
buf += p
|
||||||
|
if p == "}T":
|
||||||
|
s = buf.strip()
|
||||||
|
if s:
|
||||||
|
chunks.append(s)
|
||||||
|
buf = ""
|
||||||
|
if buf.strip():
|
||||||
|
chunks.append(buf.strip())
|
||||||
|
chunks = chunks[:max_chunks]
|
||||||
|
|
||||||
|
def esc(s):
|
||||||
|
s = s.replace('\\', '\\\\').replace('"', '\\"')
|
||||||
|
s = s.replace('\r', ' ').replace('\n', ' ')
|
||||||
|
s = re.sub(r'\s+', ' ', s).strip()
|
||||||
|
return s
|
||||||
|
|
||||||
|
with open(out_path, "w") as f:
|
||||||
|
f.write("(define hayes-chunks (list\n")
|
||||||
|
for c in chunks:
|
||||||
|
f.write(' "' + esc(c) + '"\n')
|
||||||
|
f.write("))\n\n")
|
||||||
|
f.write("(define\n")
|
||||||
|
f.write(" hayes-run-all\n")
|
||||||
|
f.write(" (fn\n")
|
||||||
|
f.write(" ()\n")
|
||||||
|
f.write(" (hayes-reset!)\n")
|
||||||
|
f.write(" (let ((s (hayes-boot)))\n")
|
||||||
|
f.write(" (for-each (fn (c) (hayes-run-chunk s c)) hayes-chunks))\n")
|
||||||
|
f.write(" (hayes-summary)))\n")
|
||||||
|
PY
|
||||||
|
|
||||||
|
# 4. run it
|
||||||
|
OUT=$(printf '(epoch 1)\n(load "lib/forth/runtime.sx")\n(epoch 2)\n(load "lib/forth/reader.sx")\n(epoch 3)\n(load "lib/forth/interpreter.sx")\n(epoch 4)\n(load "lib/forth/compiler.sx")\n(epoch 5)\n(load "lib/forth/hayes-runner.sx")\n(epoch 6)\n(load "%s")\n(epoch 7)\n(eval "(hayes-run-all)")\n' "$CHUNKS_SX" \
|
||||||
|
| timeout 180 "$SX_SERVER" 2>&1)
|
||||||
|
STATUS=$?
|
||||||
|
|
||||||
|
SUMMARY=$(printf '%s\n' "$OUT" | awk '/^\{:pass / {print; exit}')
|
||||||
|
PASS=$(printf '%s' "$SUMMARY" | sed -n 's/.*:pass \([0-9-]*\).*/\1/p')
|
||||||
|
FAIL=$(printf '%s' "$SUMMARY" | sed -n 's/.*:fail \([0-9-]*\).*/\1/p')
|
||||||
|
ERR=$(printf '%s' "$SUMMARY" | sed -n 's/.*:error \([0-9-]*\).*/\1/p')
|
||||||
|
TOTAL=$(printf '%s' "$SUMMARY" | sed -n 's/.*:total \([0-9-]*\).*/\1/p')
|
||||||
|
CHUNK_COUNT=$(grep -c '^ "' "$CHUNKS_SX" || echo 0)
|
||||||
|
TOTAL_AVAILABLE=$(grep -c '}T' "$PREPROC" || echo 0)
|
||||||
|
|
||||||
|
NOW="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||||
|
|
||||||
|
if [ -z "$PASS" ]; then
|
||||||
|
PASS=0; FAIL=0; ERR=0; TOTAL=0
|
||||||
|
NOTE="runner halted before completing (timeout or SX error)"
|
||||||
|
else
|
||||||
|
NOTE="completed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
PCT=0
|
||||||
|
if [ "$TOTAL" -gt 0 ]; then
|
||||||
|
PCT=$((PASS * 100 / TOTAL))
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat > "$OUT_JSON" <<JSON
|
||||||
|
{
|
||||||
|
"source": "gerryjackson/forth2012-test-suite src/core.fr",
|
||||||
|
"generated_at": "$NOW",
|
||||||
|
"chunks_available": $TOTAL_AVAILABLE,
|
||||||
|
"chunks_fed": $CHUNK_COUNT,
|
||||||
|
"total": $TOTAL,
|
||||||
|
"pass": $PASS,
|
||||||
|
"fail": $FAIL,
|
||||||
|
"error": $ERR,
|
||||||
|
"percent": $PCT,
|
||||||
|
"note": "$NOTE"
|
||||||
|
}
|
||||||
|
JSON
|
||||||
|
|
||||||
|
cat > "$OUT_MD" <<MD
|
||||||
|
# Forth Hayes Core scoreboard
|
||||||
|
|
||||||
|
| metric | value |
|
||||||
|
| ----------------- | ----: |
|
||||||
|
| chunks available | $TOTAL_AVAILABLE |
|
||||||
|
| chunks fed | $CHUNK_COUNT |
|
||||||
|
| total | $TOTAL |
|
||||||
|
| pass | $PASS |
|
||||||
|
| fail | $FAIL |
|
||||||
|
| error | $ERR |
|
||||||
|
| percent | ${PCT}% |
|
||||||
|
|
||||||
|
- **Source**: \`gerryjackson/forth2012-test-suite\` \`src/core.fr\`
|
||||||
|
- **Generated**: $NOW
|
||||||
|
- **Note**: $NOTE
|
||||||
|
|
||||||
|
A "chunk" is any preprocessed segment ending at a \`}T\` (every Hayes test
|
||||||
|
is one chunk, plus the small declaration blocks between tests).
|
||||||
|
The runner catches raised errors at chunk boundaries so one bad chunk
|
||||||
|
does not abort the rest. \`error\` covers chunks that raised; \`fail\`
|
||||||
|
covers tests whose \`->\` / \`}T\` comparison mismatched.
|
||||||
|
|
||||||
|
### Chunk cap
|
||||||
|
|
||||||
|
\`conformance.sh\` processes the first \`\$MAX_CHUNKS\` chunks (default
|
||||||
|
**590**). Past that, \`core.fr\` ships tests that rely on unsigned
|
||||||
|
integer wrap-around (e.g. \`COUNT-BITS\` using \`BEGIN DUP WHILE … 2*
|
||||||
|
REPEAT\`), which never terminates on our bignum-based interpreter. The
|
||||||
|
cap should rise as those tests unblock — run with \`MAX_CHUNKS=639
|
||||||
|
./conformance.sh\` once they do.
|
||||||
|
MD
|
||||||
|
|
||||||
|
echo "$SUMMARY"
|
||||||
|
echo "Scoreboard: $OUT_JSON"
|
||||||
|
echo " $OUT_MD"
|
||||||
|
|
||||||
|
if [ "$STATUS" -ne 0 ] && [ "$TOTAL" -eq 0 ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
135
lib/forth/hayes-runner.sx
Normal file
135
lib/forth/hayes-runner.sx
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
;; Hayes conformance test runner.
|
||||||
|
;; Installs T{ -> }T as Forth primitives that snapshot and compare dstack,
|
||||||
|
;; plus stub TESTING / HEX / DECIMAL so the Hayes Core file can stream
|
||||||
|
;; through the interpreter without halting on unsupported metadata words.
|
||||||
|
|
||||||
|
(define hayes-pass 0)
|
||||||
|
(define hayes-fail 0)
|
||||||
|
(define hayes-error 0)
|
||||||
|
(define hayes-start-depth 0)
|
||||||
|
(define hayes-actual (list))
|
||||||
|
(define hayes-actual-set false)
|
||||||
|
(define hayes-failures (list))
|
||||||
|
(define hayes-first-error "")
|
||||||
|
|
||||||
|
(define
|
||||||
|
hayes-reset!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(set! hayes-pass 0)
|
||||||
|
(set! hayes-fail 0)
|
||||||
|
(set! hayes-error 0)
|
||||||
|
(set! hayes-start-depth 0)
|
||||||
|
(set! hayes-actual (list))
|
||||||
|
(set! hayes-actual-set false)
|
||||||
|
(set! hayes-failures (list))
|
||||||
|
(set! hayes-first-error "")))
|
||||||
|
|
||||||
|
(define
|
||||||
|
hayes-slice
|
||||||
|
(fn
|
||||||
|
(state base)
|
||||||
|
(let
|
||||||
|
((n (- (forth-depth state) base)))
|
||||||
|
(if (<= n 0) (list) (take (get state "dstack") n)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
hayes-truncate!
|
||||||
|
(fn
|
||||||
|
(state base)
|
||||||
|
(let
|
||||||
|
((n (- (forth-depth state) base)))
|
||||||
|
(when (> n 0) (dict-set! state "dstack" (drop (get state "dstack") n))))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
hayes-install!
|
||||||
|
(fn
|
||||||
|
(state)
|
||||||
|
(forth-def-prim!
|
||||||
|
state
|
||||||
|
"T{"
|
||||||
|
(fn
|
||||||
|
(s)
|
||||||
|
(set! hayes-start-depth (forth-depth s))
|
||||||
|
(set! hayes-actual-set false)
|
||||||
|
(set! hayes-actual (list))))
|
||||||
|
(forth-def-prim!
|
||||||
|
state
|
||||||
|
"->"
|
||||||
|
(fn
|
||||||
|
(s)
|
||||||
|
(set! hayes-actual (hayes-slice s hayes-start-depth))
|
||||||
|
(set! hayes-actual-set true)
|
||||||
|
(hayes-truncate! s hayes-start-depth)))
|
||||||
|
(forth-def-prim!
|
||||||
|
state
|
||||||
|
"}T"
|
||||||
|
(fn
|
||||||
|
(s)
|
||||||
|
(let
|
||||||
|
((expected (hayes-slice s hayes-start-depth)))
|
||||||
|
(hayes-truncate! s hayes-start-depth)
|
||||||
|
(if
|
||||||
|
(and hayes-actual-set (= expected hayes-actual))
|
||||||
|
(set! hayes-pass (+ hayes-pass 1))
|
||||||
|
(begin
|
||||||
|
(set! hayes-fail (+ hayes-fail 1))
|
||||||
|
(set!
|
||||||
|
hayes-failures
|
||||||
|
(concat
|
||||||
|
hayes-failures
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"kind"
|
||||||
|
"fail"
|
||||||
|
"expected"
|
||||||
|
(str expected)
|
||||||
|
"actual"
|
||||||
|
(str hayes-actual))))))))))
|
||||||
|
(forth-def-prim! state "TESTING" (fn (s) nil))
|
||||||
|
(forth-def-prim! state "HEX" (fn (s) (dict-set! s "base" 16)))
|
||||||
|
(forth-def-prim! state "DECIMAL" (fn (s) (dict-set! s "base" 10)))
|
||||||
|
state))
|
||||||
|
|
||||||
|
(define
|
||||||
|
hayes-boot
|
||||||
|
(fn () (let ((s (forth-boot))) (hayes-install! s) (hayes-reset!) s)))
|
||||||
|
|
||||||
|
;; Run a single preprocessed chunk (string of Forth source) on the shared
|
||||||
|
;; state. Catch any raised error and move on — the chunk boundary is a
|
||||||
|
;; safe resume point.
|
||||||
|
(define
|
||||||
|
hayes-run-chunk
|
||||||
|
(fn
|
||||||
|
(state src)
|
||||||
|
(guard
|
||||||
|
(err
|
||||||
|
((= 1 1)
|
||||||
|
(begin
|
||||||
|
(set! hayes-error (+ hayes-error 1))
|
||||||
|
(when
|
||||||
|
(= (len hayes-first-error) 0)
|
||||||
|
(set! hayes-first-error (str err)))
|
||||||
|
(dict-set! state "dstack" (list))
|
||||||
|
(dict-set! state "rstack" (list))
|
||||||
|
(dict-set! state "compiling" false)
|
||||||
|
(dict-set! state "current-def" nil)
|
||||||
|
(dict-set! state "cstack" (list))
|
||||||
|
(dict-set! state "input" (list)))))
|
||||||
|
(forth-interpret state src))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
hayes-summary
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(dict
|
||||||
|
"pass"
|
||||||
|
hayes-pass
|
||||||
|
"fail"
|
||||||
|
hayes-fail
|
||||||
|
"error"
|
||||||
|
hayes-error
|
||||||
|
"total"
|
||||||
|
(+ (+ hayes-pass hayes-fail) hayes-error)
|
||||||
|
"first-error"
|
||||||
|
hayes-first-error)))
|
||||||
12
lib/forth/scoreboard.json
Normal file
12
lib/forth/scoreboard.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"source": "gerryjackson/forth2012-test-suite src/core.fr",
|
||||||
|
"generated_at": "2026-04-24T19:13:12Z",
|
||||||
|
"chunks_available": 638,
|
||||||
|
"chunks_fed": 590,
|
||||||
|
"total": 590,
|
||||||
|
"pass": 165,
|
||||||
|
"fail": 0,
|
||||||
|
"error": 425,
|
||||||
|
"percent": 27,
|
||||||
|
"note": "completed"
|
||||||
|
}
|
||||||
30
lib/forth/scoreboard.md
Normal file
30
lib/forth/scoreboard.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Forth Hayes Core scoreboard
|
||||||
|
|
||||||
|
| metric | value |
|
||||||
|
| ----------------- | ----: |
|
||||||
|
| chunks available | 638 |
|
||||||
|
| chunks fed | 590 |
|
||||||
|
| total | 590 |
|
||||||
|
| pass | 165 |
|
||||||
|
| fail | 0 |
|
||||||
|
| error | 425 |
|
||||||
|
| percent | 27% |
|
||||||
|
|
||||||
|
- **Source**: `gerryjackson/forth2012-test-suite` `src/core.fr`
|
||||||
|
- **Generated**: 2026-04-24T19:13:12Z
|
||||||
|
- **Note**: completed
|
||||||
|
|
||||||
|
A "chunk" is any preprocessed segment ending at a `}T` (every Hayes test
|
||||||
|
is one chunk, plus the small declaration blocks between tests).
|
||||||
|
The runner catches raised errors at chunk boundaries so one bad chunk
|
||||||
|
does not abort the rest. `error` covers chunks that raised; `fail`
|
||||||
|
covers tests whose `->` / `}T` comparison mismatched.
|
||||||
|
|
||||||
|
### Chunk cap
|
||||||
|
|
||||||
|
`conformance.sh` processes the first `$MAX_CHUNKS` chunks (default
|
||||||
|
**590**). Past that, `core.fr` ships tests that rely on unsigned
|
||||||
|
integer wrap-around (e.g. `COUNT-BITS` using `BEGIN DUP WHILE … 2*
|
||||||
|
REPEAT`), which never terminates on our bignum-based interpreter. The
|
||||||
|
cap should rise as those tests unblock — run with `MAX_CHUNKS=639
|
||||||
|
./conformance.sh` once they do.
|
||||||
@@ -74,8 +74,8 @@ Representation:
|
|||||||
- [x] `DO`, `LOOP`, `+LOOP`, `I`, `J`, `LEAVE` — counted loops (needs a return stack)
|
- [x] `DO`, `LOOP`, `+LOOP`, `I`, `J`, `LEAVE` — counted loops (needs a return stack)
|
||||||
- [x] Return stack: `>R`, `R>`, `R@`, `2>R`, `2R>`, `2R@`
|
- [x] Return stack: `>R`, `R>`, `R@`, `2>R`, `2R>`, `2R@`
|
||||||
- [x] Vendor John Hayes' test suite to `lib/forth/ans-tests/`
|
- [x] Vendor John Hayes' test suite to `lib/forth/ans-tests/`
|
||||||
- [ ] `lib/forth/conformance.sh` + runner; `scoreboard.json` + `scoreboard.md`
|
- [x] `lib/forth/conformance.sh` + runner; `scoreboard.json` + `scoreboard.md`
|
||||||
- [ ] Baseline: probably 30-50% Core passing after phase 3
|
- [x] Baseline: probably 30-50% Core passing after phase 3
|
||||||
|
|
||||||
### Phase 4 — strings + more Core
|
### Phase 4 — strings + more Core
|
||||||
- [ ] `S"`, `C"`, `."`, `TYPE`, `COUNT`, `CMOVE`, `FILL`, `BLANK`
|
- [ ] `S"`, `C"`, `."`, `TYPE`, `COUNT`, `CMOVE`, `FILL`, `BLANK`
|
||||||
@@ -99,6 +99,28 @@ Representation:
|
|||||||
|
|
||||||
_Newest first._
|
_Newest first._
|
||||||
|
|
||||||
|
- **Phase 3 — Hayes conformance runner + baseline scoreboard (165/590, 28%).**
|
||||||
|
`lib/forth/conformance.sh` preprocesses `ans-tests/core.fr` (strips `\`
|
||||||
|
and `( ... )` comments + `TESTING` lines), splits the source on every
|
||||||
|
`}T` so each Hayes test plus the small declaration blocks between
|
||||||
|
them are one safe-resume chunk, and emits an SX driver that feeds
|
||||||
|
the chunks through `lib/forth/hayes-runner.sx`. The runner registers
|
||||||
|
`T{`/`->`/`}T` as Forth primitives that snapshot the dstack depth on
|
||||||
|
`T{`, record actual on `->`, compare on `}T`, and install stub
|
||||||
|
`HEX`/`DECIMAL`/`TESTING` so metadata doesn't halt the stream. Errors
|
||||||
|
raised inside a chunk are caught by `guard` and the state is reset,
|
||||||
|
so one bad test does not break the rest. Outputs
|
||||||
|
`scoreboard.json` + `scoreboard.md`.
|
||||||
|
|
||||||
|
First-run baseline: 165 pass / 425 error / 0 fail on the first 590
|
||||||
|
chunks. The default cap sits at 590 because `core.fr` chunks beyond
|
||||||
|
that rely on unsigned-integer wrap-around (e.g. `COUNT-BITS` with
|
||||||
|
`BEGIN DUP WHILE … 2* REPEAT`) which never terminates on our
|
||||||
|
bignum-based Forth; raise `MAX_CHUNKS` once those tests unblock.
|
||||||
|
Majority of errors are missing Phase-4 words (`RSHIFT`, `LSHIFT`,
|
||||||
|
`CELLS`, `S"`, `CHAR`, `SOURCE`, etc.) — each one implemented should
|
||||||
|
convert a cluster of errors to passes.
|
||||||
|
|
||||||
- **Phase 3 — vendor Gerry Jackson's forth2012-test-suite.** Added
|
- **Phase 3 — vendor Gerry Jackson's forth2012-test-suite.** Added
|
||||||
`lib/forth/ans-tests/{tester.fr, core.fr, coreexttest.fth}` from
|
`lib/forth/ans-tests/{tester.fr, core.fr, coreexttest.fth}` from
|
||||||
https://github.com/gerryjackson/forth2012-test-suite (master, fetched
|
https://github.com/gerryjackson/forth2012-test-suite (master, fetched
|
||||||
|
|||||||
Reference in New Issue
Block a user