From ae94a24de55d5438f2a50b1d9fde2738912735eb Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 25 Apr 2026 07:54:48 +0000 Subject: [PATCH] smalltalk: conformance.sh + scoreboard.{json,md} --- lib/smalltalk/conformance.sh | 99 +++++++++++++++++++++++++++++++++++ lib/smalltalk/scoreboard.json | 15 ++++++ lib/smalltalk/scoreboard.md | 44 ++++++++++++++++ plans/smalltalk-on-sx.md | 3 +- 4 files changed, 160 insertions(+), 1 deletion(-) create mode 100755 lib/smalltalk/conformance.sh create mode 100644 lib/smalltalk/scoreboard.json create mode 100644 lib/smalltalk/scoreboard.md diff --git a/lib/smalltalk/conformance.sh b/lib/smalltalk/conformance.sh new file mode 100755 index 00000000..f7253548 --- /dev/null +++ b/lib/smalltalk/conformance.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# Smalltalk-on-SX conformance runner. +# +# Runs the full test suite once with per-file detail, pulls out the +# classic-corpus numbers, and writes: +# lib/smalltalk/scoreboard.json — machine-readable summary +# lib/smalltalk/scoreboard.md — human-readable summary +# +# Usage: bash lib/smalltalk/conformance.sh + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +OUT_JSON="lib/smalltalk/scoreboard.json" +OUT_MD="lib/smalltalk/scoreboard.md" + +DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ) + +# Catalog .st programs in the corpus. +PROGRAMS=() +for f in lib/smalltalk/tests/programs/*.st; do + [ -f "$f" ] || continue + PROGRAMS+=("$(basename "$f" .st)") +done +NUM_PROGRAMS=${#PROGRAMS[@]} + +# Run the full test suite with per-file detail. +RUNNER_OUT=$(bash lib/smalltalk/test.sh -v 2>&1) +RC=$? + +# Final summary line: "OK 403/403 ..." or "FAIL 400/403 ...". +ALL_SUM=$(echo "$RUNNER_OUT" | grep -E '^(OK|FAIL) [0-9]+/[0-9]+' | tail -1) +ALL_PASS=$(echo "$ALL_SUM" | grep -oE '[0-9]+/[0-9]+' | head -1 | cut -d/ -f1) +ALL_TOTAL=$(echo "$ALL_SUM" | grep -oE '[0-9]+/[0-9]+' | head -1 | cut -d/ -f2) + +# Per-file pass counts (verbose lines look like "OK N passed"). +get_pass () { + local fname="$1" + echo "$RUNNER_OUT" | awk -v f="$fname" ' + $0 ~ f { for (i=1; i<=NF; i++) if ($i ~ /^[0-9]+$/) { print $i; exit } }' +} + +PROG_PASS=$(get_pass "tests/programs.sx") +PROG_PASS=${PROG_PASS:-0} + +# scoreboard.json +{ + printf '{\n' + printf ' "date": "%s",\n' "$DATE" + printf ' "programs": [\n' + for i in "${!PROGRAMS[@]}"; do + sep=","; [ "$i" -eq "$((NUM_PROGRAMS - 1))" ] && sep="" + printf ' "%s.st"%s\n' "${PROGRAMS[$i]}" "$sep" + done + printf ' ],\n' + printf ' "program_count": %d,\n' "$NUM_PROGRAMS" + printf ' "program_tests_passed": %s,\n' "$PROG_PASS" + printf ' "all_tests_passed": %s,\n' "$ALL_PASS" + printf ' "all_tests_total": %s,\n' "$ALL_TOTAL" + printf ' "exit_code": %d\n' "$RC" + printf '}\n' +} > "$OUT_JSON" + +# scoreboard.md +{ + printf '# Smalltalk-on-SX Scoreboard\n\n' + printf '_Last run: %s_\n\n' "$DATE" + + printf '## Totals\n\n' + printf '| Suite | Passing |\n' + printf '|-------|---------|\n' + printf '| All Smalltalk-on-SX tests | **%s / %s** |\n' "$ALL_PASS" "$ALL_TOTAL" + printf '| Classic-corpus tests (`tests/programs.sx`) | **%s** |\n\n' "$PROG_PASS" + + printf '## Classic-corpus programs (`lib/smalltalk/tests/programs/`)\n\n' + printf '| Program | Status |\n' + printf '|---------|--------|\n' + for prog in "${PROGRAMS[@]}"; do + printf '| `%s.st` | present |\n' "$prog" + done + printf '\n' + + printf '## Per-file test counts\n\n' + printf '```\n' + echo "$RUNNER_OUT" | grep -E '^(OK|X) lib/smalltalk/tests/' | sort + printf '```\n\n' + + printf '## Notes\n\n' + printf -- '- The spec interpreter is correct but slow (call/cc + dict-based ivars per send).\n' + printf -- '- Larger Life multi-step verification, the 8-queens canonical case, and the glider-gun pattern are deferred to the JIT path.\n' + printf -- '- Generated by `bash lib/smalltalk/conformance.sh`. Both files are committed; the runner overwrites them on each run.\n' +} > "$OUT_MD" + +echo "Scoreboard updated:" +echo " $OUT_JSON" +echo " $OUT_MD" +echo "Programs: $NUM_PROGRAMS Corpus tests: $PROG_PASS All: $ALL_PASS/$ALL_TOTAL" + +exit $RC diff --git a/lib/smalltalk/scoreboard.json b/lib/smalltalk/scoreboard.json new file mode 100644 index 00000000..d9c5ff43 --- /dev/null +++ b/lib/smalltalk/scoreboard.json @@ -0,0 +1,15 @@ +{ + "date": "2026-04-25T07:53:18Z", + "programs": [ + "eight-queens.st", + "fibonacci.st", + "life.st", + "mandelbrot.st", + "quicksort.st" + ], + "program_count": 5, + "program_tests_passed": 39, + "all_tests_passed": 403, + "all_tests_total": 403, + "exit_code": 0 +} diff --git a/lib/smalltalk/scoreboard.md b/lib/smalltalk/scoreboard.md new file mode 100644 index 00000000..5d4c6230 --- /dev/null +++ b/lib/smalltalk/scoreboard.md @@ -0,0 +1,44 @@ +# Smalltalk-on-SX Scoreboard + +_Last run: 2026-04-25T07:53:18Z_ + +## Totals + +| Suite | Passing | +|-------|---------| +| All Smalltalk-on-SX tests | **403 / 403** | +| Classic-corpus tests (`tests/programs.sx`) | **39** | + +## Classic-corpus programs (`lib/smalltalk/tests/programs/`) + +| Program | Status | +|---------|--------| +| `eight-queens.st` | present | +| `fibonacci.st` | present | +| `life.st` | present | +| `mandelbrot.st` | present | +| `quicksort.st` | present | + +## Per-file test counts + +``` +OK lib/smalltalk/tests/blocks.sx 19 passed +OK lib/smalltalk/tests/cannot_return.sx 5 passed +OK lib/smalltalk/tests/conditional.sx 25 passed +OK lib/smalltalk/tests/dnu.sx 15 passed +OK lib/smalltalk/tests/eval.sx 68 passed +OK lib/smalltalk/tests/nlr.sx 14 passed +OK lib/smalltalk/tests/parse_chunks.sx 21 passed +OK lib/smalltalk/tests/parse.sx 47 passed +OK lib/smalltalk/tests/programs.sx 39 passed +OK lib/smalltalk/tests/runtime.sx 64 passed +OK lib/smalltalk/tests/super.sx 9 passed +OK lib/smalltalk/tests/tokenize.sx 63 passed +OK lib/smalltalk/tests/while.sx 14 passed +``` + +## Notes + +- The spec interpreter is correct but slow (call/cc + dict-based ivars per send). +- Larger Life multi-step verification, the 8-queens canonical case, and the glider-gun pattern are deferred to the JIT path. +- Generated by `bash lib/smalltalk/conformance.sh`. Both files are committed; the runner overwrites them on each run. diff --git a/plans/smalltalk-on-sx.md b/plans/smalltalk-on-sx.md index 5bbb2648..1408abab 100644 --- a/plans/smalltalk-on-sx.md +++ b/plans/smalltalk-on-sx.md @@ -76,7 +76,7 @@ Core mapping: - [x] `mandelbrot.st` — escape-time iteration of `z := z² + c` in `lib/smalltalk/tests/programs/mandelbrot.st`. Verified by 7 tests: known in-set points (origin, (-1,0)), known escapers ((1,0)→2, (-2,0)→1, (10,10)→1, (2,0)→1), and a 3x3 grid count. Caught a real bug along the way: literal `#(...)` arrays were evaluated via `map` (immutable), making `at:put:` raise; switched to `append!` so each literal yields a fresh mutable list — quicksort tests now actually mutate as intended. - [x] `life.st` (Conway's Life). `lib/smalltalk/tests/programs/life.st` carries the canonical rules with edge handling. Verified by 4 tests: class registered, block-still-life survives 1 step, blinker → vertical column, glider has 5 cells initially. Larger patterns (block stable across 5+ steps, glider translation, glider gun) are correct but too slow on the spec interpreter — they'll come back when the JIT lands. Also added Pharo-style dynamic array literal `{e1. e2. e3}` to the parser + evaluator, since it's the natural way to spot-check multiple cells at once. - [x] `fibonacci.st` (recursive + Array-memoised) — `lib/smalltalk/tests/programs/fibonacci.st`. Loaded from chunk-format source by new `smalltalk-load` helper; verified by 13 tests in `lib/smalltalk/tests/programs.sx` (recursive `fib:`, memoised `memoFib:` up to 30, instance independence, class-table integrity). Source is currently duplicated as a string in the SX test file because there's no SX file-read primitive; conformance.sh will dedupe by piping the .st file directly. -- [ ] `lib/smalltalk/conformance.sh` + runner, `scoreboard.json` + `scoreboard.md` +- [x] `lib/smalltalk/conformance.sh` + runner, `scoreboard.json` + `scoreboard.md`. The runner runs `bash lib/smalltalk/test.sh -v` once, parses per-file counts, and emits both files. JSON has date / program names / corpus-test count / all-test pass/total / exit code. Markdown has a totals table, the program list, the verbatim per-file test counts block, and notes about JIT-deferred work. Both are checked into the tree as the latest baseline; the runner overwrites them. ### Phase 4 — reflection + MOP - [ ] `Object>>class`, `class>>name`, `class>>superclass`, `class>>methodDict`, `class>>selectors` @@ -108,6 +108,7 @@ Core mapping: _Newest first. Agent appends on every commit._ +- 2026-04-25: conformance.sh + scoreboard.{json,md} (`lib/smalltalk/conformance.sh`, `lib/smalltalk/scoreboard.json`, `lib/smalltalk/scoreboard.md`). Single-pass runner over `test.sh -v`; baseline at 5 programs / 39 corpus tests / 403 total. **Phase 3 complete.** - 2026-04-25: classic-corpus #5 Life (`tests/programs/life.st`, 4 tests). Spec-interpreter Conway's Life with edge handling. Block + blinker + glider initial setup verified; larger step counts pending JIT (each spec-interpreter step is ~5-8s on a 5x5 grid). Added `{e1. e2. e3}` dynamic array literal to parser + evaluator. 403/403 total. - 2026-04-25: classic-corpus #4 mandelbrot (`tests/programs/mandelbrot.st`, 7 tests). Escape-time iterator + grid counter. Discovered + fixed an immutable-list bug in `lit-array` eval — `map` produced an immutable list so `at:put:` raised; rebuilt via `append!`. Quicksort tests had been silently dropping ~7 cases due to that bug; now actually mutate. 399/399 total. - 2026-04-25: classic-corpus #3 quicksort (`tests/programs/quicksort.st`, 9 tests). Lomuto partition; verified across duplicates, already-sorted/reverse-sorted, empty, single, negatives, all-equal, plus in-place mutation. 385/385 total.