GUEST: step 1 — lib/guest/conformance.{sx,sh} config-driven driver

Extracted the duplicated conformance plumbing into a single driver:

- lib/guest/conformance.sx — two helper fns that emit (gc-result NAME P F T)
  lines for the bash side to grep: gc-dict-result for runners returning
  a {:passed :failed :total} dict, and gc-counters-result for guests that
  bump a global pass/fail counter from a test file load.

- lib/guest/conformance.sh — config-driven bash driver. Sources a per-lang
  conf, locates sx_server, runs sx_server in either single-session "dict"
  mode (one preload + many suite evals) or per-suite "counters" mode
  (fresh sx_server per suite, with shared preloads). Aggregates and writes
  scoreboard.{json,md} via per-lang emit_scoreboard_* functions.

- Ported lib/prolog/conformance.sh and lib/haskell/conformance.sh down to
  one-line wrappers that exec the shared driver against their .conf file.

Verification:
- Prolog: 590/590 — diff vs baseline is timestamp-only.
- Haskell: 156/156 — significantly higher than the 0/18 in baseline. The
  old conformance.sh was buggy (its `(ok-len 3 ...)` grep never matched,
  defaulting every program to 0 pass / 1 fail). Updated baseline to the
  true count; no actual test regressed. Plan baseline cell updated.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-06 22:46:48 +00:00
parent 0eced4c34c
commit 58dcff2639
11 changed files with 522 additions and 367 deletions

View File

@@ -0,0 +1,76 @@
# Haskell-on-SX conformance config — sourced by lib/guest/conformance.sh.
LANG_NAME=haskell
MODE=counters
COUNTERS_PASS=hk-test-pass
COUNTERS_FAIL=hk-test-fail
TIMEOUT_PER_SUITE=120
PRELOADS=(
lib/haskell/tokenizer.sx
lib/haskell/layout.sx
lib/haskell/parser.sx
lib/haskell/desugar.sx
lib/haskell/runtime.sx
lib/haskell/match.sx
lib/haskell/eval.sx
lib/haskell/testlib.sx
)
SUITES=(
"fib:lib/haskell/tests/program-fib.sx"
"sieve:lib/haskell/tests/program-sieve.sx"
"quicksort:lib/haskell/tests/program-quicksort.sx"
"nqueens:lib/haskell/tests/program-nqueens.sx"
"calculator:lib/haskell/tests/program-calculator.sx"
"collatz:lib/haskell/tests/program-collatz.sx"
"palindrome:lib/haskell/tests/program-palindrome.sx"
"maybe:lib/haskell/tests/program-maybe.sx"
"fizzbuzz:lib/haskell/tests/program-fizzbuzz.sx"
"anagram:lib/haskell/tests/program-anagram.sx"
"roman:lib/haskell/tests/program-roman.sx"
"binary:lib/haskell/tests/program-binary.sx"
"either:lib/haskell/tests/program-either.sx"
"primes:lib/haskell/tests/program-primes.sx"
"zipwith:lib/haskell/tests/program-zipwith.sx"
"matrix:lib/haskell/tests/program-matrix.sx"
"wordcount:lib/haskell/tests/program-wordcount.sx"
"powers:lib/haskell/tests/program-powers.sx"
)
emit_scoreboard_json() {
local n=${#GC_NAMES[@]} i sep date_only
date_only=$(date '+%Y-%m-%d')
printf '{\n'
printf ' "date": "%s",\n' "$date_only"
printf ' "total_pass": %d,\n' "$GC_TOTAL_PASS"
printf ' "total_fail": %d,\n' "$GC_TOTAL_FAIL"
printf ' "programs": {\n'
for ((i=0; i<n; i++)); do
sep=","; [ $i -eq $((n-1)) ] && sep=""
printf ' "%s": {"pass": %d, "fail": %d}%s\n' \
"${GC_NAMES[$i]}" "${GC_PASS[$i]}" "${GC_FAIL[$i]}" "$sep"
done
printf ' }\n'
printf '}\n'
}
emit_scoreboard_md() {
local n=${#GC_NAMES[@]}
local i status p f t prog_pass=0 prog_total=$n date_only
date_only=$(date '+%Y-%m-%d')
for ((i=0; i<n; i++)); do
[ "${GC_FAIL[$i]}" -eq 0 ] && prog_pass=$((prog_pass + 1))
done
printf '# Haskell-on-SX Scoreboard\n\n'
printf 'Updated %s · Phase 6 (prelude extras + 18 programs)\n\n' "$date_only"
printf '| Program | Tests | Status |\n'
printf '|---------|-------|--------|\n'
for ((i=0; i<n; i++)); do
p=${GC_PASS[$i]}; f=${GC_FAIL[$i]}; t=${GC_TOTAL_S[$i]}
[ "$f" -eq 0 ] && status="✓" || status="✗"
printf '| %s.hs | %d/%d | %s |\n' "${GC_NAMES[$i]}" "$p" "$t" "$status"
done
printf '| **Total** | **%d/%d** | **%d/%d programs** |\n' \
"$GC_TOTAL_PASS" "$GC_TOTAL" "$prog_pass" "$prog_total"
}

View File

@@ -1,140 +1,3 @@
#!/usr/bin/env bash
# lib/haskell/conformance.sh — run the classic-program test suites.
# Writes lib/haskell/scoreboard.json and lib/haskell/scoreboard.md.
#
# Usage:
# bash lib/haskell/conformance.sh # run + write scoreboards
# bash lib/haskell/conformance.sh --check # run only, exit 1 on failure
set -euo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe"
if [ ! -x "$SX_SERVER" ]; then
MAIN_ROOT=$(git worktree list | head -1 | awk '{print $1}')
if [ -x "$MAIN_ROOT/$SX_SERVER" ]; then
SX_SERVER="$MAIN_ROOT/$SX_SERVER"
else
echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build"
exit 1
fi
fi
PROGRAMS=(fib sieve quicksort nqueens calculator collatz palindrome maybe fizzbuzz anagram roman binary either primes zipwith matrix wordcount powers)
PASS_COUNTS=()
FAIL_COUNTS=()
run_suite() {
local prog="$1"
local FILE="lib/haskell/tests/program-${prog}.sx"
local TMPFILE
TMPFILE=$(mktemp)
cat > "$TMPFILE" <<EPOCHS
(epoch 1)
(load "lib/haskell/tokenizer.sx")
(load "lib/haskell/layout.sx")
(load "lib/haskell/parser.sx")
(load "lib/haskell/desugar.sx")
(load "lib/haskell/runtime.sx")
(load "lib/haskell/match.sx")
(load "lib/haskell/eval.sx")
(load "lib/haskell/testlib.sx")
(epoch 2)
(load "$FILE")
(epoch 3)
(eval "(list hk-test-pass hk-test-fail)")
EPOCHS
local OUTPUT
OUTPUT=$(timeout 120 "$SX_SERVER" < "$TMPFILE" 2>&1 || true)
rm -f "$TMPFILE"
local LINE
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}')
if [ -z "$LINE" ]; then
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \
| sed -E 's/^\(ok 3 //; s/\)$//' || true)
fi
if [ -z "$LINE" ]; then
echo "0 1"
else
local P F
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/' || echo "0")
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/' || echo "1")
echo "$P $F"
fi
}
for prog in "${PROGRAMS[@]}"; do
RESULT=$(run_suite "$prog")
P=$(echo "$RESULT" | cut -d' ' -f1)
F=$(echo "$RESULT" | cut -d' ' -f2)
PASS_COUNTS+=("$P")
FAIL_COUNTS+=("$F")
T=$((P + F))
if [ "$F" -eq 0 ]; then
printf '✓ %-14s %d/%d\n' "${prog}.hs" "$P" "$T"
else
printf '✗ %-14s %d/%d\n' "${prog}.hs" "$P" "$T"
fi
done
TOTAL_PASS=0
TOTAL_FAIL=0
PROG_PASS=0
for i in "${!PROGRAMS[@]}"; do
TOTAL_PASS=$((TOTAL_PASS + PASS_COUNTS[i]))
TOTAL_FAIL=$((TOTAL_FAIL + FAIL_COUNTS[i]))
[ "${FAIL_COUNTS[$i]}" -eq 0 ] && PROG_PASS=$((PROG_PASS + 1))
done
PROG_TOTAL=${#PROGRAMS[@]}
echo ""
echo "Classic programs: ${TOTAL_PASS}/$((TOTAL_PASS + TOTAL_FAIL)) tests | ${PROG_PASS}/${PROG_TOTAL} programs passing"
if [[ "${1:-}" == "--check" ]]; then
[ $TOTAL_FAIL -eq 0 ]
exit $?
fi
DATE=$(date '+%Y-%m-%d')
# scoreboard.json
{
printf '{\n'
printf ' "date": "%s",\n' "$DATE"
printf ' "total_pass": %d,\n' "$TOTAL_PASS"
printf ' "total_fail": %d,\n' "$TOTAL_FAIL"
printf ' "programs": {\n'
last=$((${#PROGRAMS[@]} - 1))
for i in "${!PROGRAMS[@]}"; do
prog="${PROGRAMS[$i]}"
if [ $i -lt $last ]; then
printf ' "%s": {"pass": %d, "fail": %d},\n' "$prog" "${PASS_COUNTS[$i]}" "${FAIL_COUNTS[$i]}"
else
printf ' "%s": {"pass": %d, "fail": %d}\n' "$prog" "${PASS_COUNTS[$i]}" "${FAIL_COUNTS[$i]}"
fi
done
printf ' }\n'
printf '}\n'
} > lib/haskell/scoreboard.json
# scoreboard.md
{
printf '# Haskell-on-SX Scoreboard\n\n'
printf 'Updated %s · Phase 6 (prelude extras + 18 programs)\n\n' "$DATE"
printf '| Program | Tests | Status |\n'
printf '|---------|-------|--------|\n'
for i in "${!PROGRAMS[@]}"; do
prog="${PROGRAMS[$i]}"
P=${PASS_COUNTS[$i]}
F=${FAIL_COUNTS[$i]}
T=$((P + F))
[ "$F" -eq 0 ] && STATUS="✓" || STATUS="✗"
printf '| %s | %d/%d | %s |\n' "${prog}.hs" "$P" "$T" "$STATUS"
done
printf '| **Total** | **%d/%d** | **%d/%d programs** |\n' \
"$TOTAL_PASS" "$((TOTAL_PASS + TOTAL_FAIL))" "$PROG_PASS" "$PROG_TOTAL"
} > lib/haskell/scoreboard.md
echo "Wrote lib/haskell/scoreboard.json and lib/haskell/scoreboard.md"
[ $TOTAL_FAIL -eq 0 ]
# Thin wrapper — see lib/guest/conformance.sh and lib/haskell/conformance.conf.
exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@"

View File

@@ -1,25 +1,25 @@
{
"date": "2026-05-06",
"total_pass": 0,
"total_fail": 18,
"total_pass": 156,
"total_fail": 0,
"programs": {
"fib": {"pass": 0, "fail": 1},
"sieve": {"pass": 0, "fail": 1},
"quicksort": {"pass": 0, "fail": 1},
"nqueens": {"pass": 0, "fail": 1},
"calculator": {"pass": 0, "fail": 1},
"collatz": {"pass": 0, "fail": 1},
"palindrome": {"pass": 0, "fail": 1},
"maybe": {"pass": 0, "fail": 1},
"fizzbuzz": {"pass": 0, "fail": 1},
"anagram": {"pass": 0, "fail": 1},
"roman": {"pass": 0, "fail": 1},
"binary": {"pass": 0, "fail": 1},
"either": {"pass": 0, "fail": 1},
"primes": {"pass": 0, "fail": 1},
"zipwith": {"pass": 0, "fail": 1},
"matrix": {"pass": 0, "fail": 1},
"wordcount": {"pass": 0, "fail": 1},
"powers": {"pass": 0, "fail": 1}
"fib": {"pass": 2, "fail": 0},
"sieve": {"pass": 2, "fail": 0},
"quicksort": {"pass": 5, "fail": 0},
"nqueens": {"pass": 2, "fail": 0},
"calculator": {"pass": 5, "fail": 0},
"collatz": {"pass": 11, "fail": 0},
"palindrome": {"pass": 8, "fail": 0},
"maybe": {"pass": 12, "fail": 0},
"fizzbuzz": {"pass": 12, "fail": 0},
"anagram": {"pass": 9, "fail": 0},
"roman": {"pass": 14, "fail": 0},
"binary": {"pass": 12, "fail": 0},
"either": {"pass": 12, "fail": 0},
"primes": {"pass": 12, "fail": 0},
"zipwith": {"pass": 9, "fail": 0},
"matrix": {"pass": 8, "fail": 0},
"wordcount": {"pass": 7, "fail": 0},
"powers": {"pass": 14, "fail": 0}
}
}

View File

@@ -4,22 +4,22 @@ Updated 2026-05-06 · Phase 6 (prelude extras + 18 programs)
| Program | Tests | Status |
|---------|-------|--------|
| fib.hs | 0/1 | |
| sieve.hs | 0/1 | |
| quicksort.hs | 0/1 | |
| nqueens.hs | 0/1 | |
| calculator.hs | 0/1 | |
| collatz.hs | 0/1 | |
| palindrome.hs | 0/1 | |
| maybe.hs | 0/1 | |
| fizzbuzz.hs | 0/1 | |
| anagram.hs | 0/1 | |
| roman.hs | 0/1 | |
| binary.hs | 0/1 | |
| either.hs | 0/1 | |
| primes.hs | 0/1 | |
| zipwith.hs | 0/1 | |
| matrix.hs | 0/1 | |
| wordcount.hs | 0/1 | |
| powers.hs | 0/1 | |
| **Total** | **0/18** | **0/18 programs** |
| fib.hs | 2/2 | |
| sieve.hs | 2/2 | |
| quicksort.hs | 5/5 | |
| nqueens.hs | 2/2 | |
| calculator.hs | 5/5 | |
| collatz.hs | 11/11 | |
| palindrome.hs | 8/8 | |
| maybe.hs | 12/12 | |
| fizzbuzz.hs | 12/12 | |
| anagram.hs | 9/9 | |
| roman.hs | 14/14 | |
| binary.hs | 12/12 | |
| either.hs | 12/12 | |
| primes.hs | 12/12 | |
| zipwith.hs | 9/9 | |
| matrix.hs | 8/8 | |
| wordcount.hs | 7/7 | |
| powers.hs | 14/14 | |
| **Total** | **156/156** | **18/18 programs** |