Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 14m2s
Extend the shared driver's MODE=counters with a backward-compatible SUITES format: name:file[:pass-var:fail-var[:extra-preload ...]]. Optional per-suite counter symbols (override the global COUNTERS_PASS/COUNTERS_FAIL) and per-suite preload chains (loaded after the global PRELOADS). Plain name:file entries are unchanged — verified against haskell (fib/sieve/quicksort 2/2/5, matches committed scoreboard). common-lisp has 8 distinct per-suite counter pairs and a different preload chain per suite, so it could not fit the single-counter/fixed-preload model; the extended format expresses it directly. conformance.conf keeps the historical scoreboard schema; conformance.sh becomes the 3-line shim. Result 487/487 (0 fail) vs the old 305/0 baseline — higher and explained: the old per-suite 'timeout 30' was too tight for the slow eval suite (~15-25s under contention), silently recording it as 0; the driver's 180s budget recovers its true 182. geometry/mop-trace stay 0/0 (pre-existing refl-class-chain-depth-with load error; counter vars defined as 0 -> clean gc-result, no fail-fallback). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
246 lines
8.2 KiB
Bash
Executable File
246 lines
8.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# lib/guest/conformance.sh — shared, config-driven conformance driver.
|
|
#
|
|
# Usage:
|
|
# bash lib/guest/conformance.sh <conf-file>
|
|
#
|
|
# The conf file is a bash file that sets:
|
|
# LANG_NAME e.g. prolog
|
|
# PRELOADS=( ... ) .sx files to load before any suite (path from repo root)
|
|
# SUITES=( ... ) colon-separated entries; format depends on MODE
|
|
# MODE "dict" or "counters"
|
|
# COUNTERS_PASS (counters mode) global symbol for the pass counter
|
|
# COUNTERS_FAIL (counters mode) global symbol for the fail counter
|
|
# TIMEOUT_PER_SUITE (optional, counters mode) seconds per suite, default 120
|
|
# SCOREBOARD_DIR (optional) defaults to lib/$LANG_NAME
|
|
#
|
|
# It may override the bash functions emit_scoreboard_json / emit_scoreboard_md
|
|
# to produce the per-language scoreboard schema. Defaults are provided.
|
|
#
|
|
# Suite formats:
|
|
# MODE=dict — "name:test-file:(runner-fn)"
|
|
# The runner expression is evaluated and is expected to
|
|
# return a dict with :passed/:failed/:total.
|
|
# MODE=counters — "name:test-file[:pass-var:fail-var[:extra-preload ...]]"
|
|
# Each suite is run in a fresh sx_server session: preloads
|
|
# are loaded, then the test file, then counters are read.
|
|
# The suite is treated as starting from counters (0, 0).
|
|
# Optional per-suite fields:
|
|
# pass-var/fail-var — counter symbols for this suite,
|
|
# overriding COUNTERS_PASS/COUNTERS_FAIL.
|
|
# extra-preload ... — space-separated .sx files loaded
|
|
# after the global PRELOADS (per-suite
|
|
# dependency chains).
|
|
# Plain "name:test-file" still works (uses the globals).
|
|
#
|
|
# Output:
|
|
# Writes $SCOREBOARD_DIR/scoreboard.json and $SCOREBOARD_DIR/scoreboard.md.
|
|
# Exits 0 if every suite is green, 1 otherwise.
|
|
|
|
set -uo pipefail
|
|
cd "$(git rev-parse --show-toplevel)"
|
|
|
|
if [ "$#" -lt 1 ]; then
|
|
echo "usage: $0 <conf-file>" >&2
|
|
exit 2
|
|
fi
|
|
|
|
CONF="$1"
|
|
if [ ! -f "$CONF" ]; then
|
|
echo "config not found: $CONF" >&2
|
|
exit 2
|
|
fi
|
|
|
|
# Defaults — the conf file may override these.
|
|
LANG_NAME=
|
|
PRELOADS=()
|
|
SUITES=()
|
|
MODE=dict
|
|
COUNTERS_PASS=
|
|
COUNTERS_FAIL=
|
|
TIMEOUT_PER_SUITE=120
|
|
SCOREBOARD_DIR=
|
|
|
|
emit_scoreboard_json() {
|
|
# Generic schema. Per-lang configs override this for byte-equality with
|
|
# historical scoreboards.
|
|
local n=${#GC_NAMES[@]} i sep
|
|
printf '{\n'
|
|
printf ' "lang": "%s",\n' "$LANG_NAME"
|
|
printf ' "total_passed": %d,\n' "$GC_TOTAL_PASS"
|
|
printf ' "total_failed": %d,\n' "$GC_TOTAL_FAIL"
|
|
printf ' "total": %d,\n' "$GC_TOTAL"
|
|
printf ' "suites": ['
|
|
for ((i=0; i<n; i++)); do
|
|
sep=","; [ $i -eq $((n-1)) ] && sep=""
|
|
printf '\n {"name":"%s","passed":%d,"failed":%d,"total":%d}%s' \
|
|
"${GC_NAMES[$i]}" "${GC_PASS[$i]}" "${GC_FAIL[$i]}" "${GC_TOTAL_S[$i]}" "$sep"
|
|
done
|
|
printf '\n ],\n'
|
|
printf ' "generated": "%s"\n' "$(date -Iseconds 2>/dev/null || date)"
|
|
printf '}\n'
|
|
}
|
|
|
|
emit_scoreboard_md() {
|
|
local n=${#GC_NAMES[@]} i status
|
|
printf '# %s scoreboard\n\n' "$LANG_NAME"
|
|
printf '**%d / %d passing** (%d failure(s)).\n\n' "$GC_TOTAL_PASS" "$GC_TOTAL" "$GC_TOTAL_FAIL"
|
|
printf '| Suite | Passed | Total | Status |\n'
|
|
printf '|-------|--------|-------|--------|\n'
|
|
for ((i=0; i<n; i++)); do
|
|
status="ok"; [ "${GC_FAIL[$i]}" -gt 0 ] && status="FAIL"
|
|
printf '| %s | %d | %d | %s |\n' \
|
|
"${GC_NAMES[$i]}" "${GC_PASS[$i]}" "${GC_TOTAL_S[$i]}" "$status"
|
|
done
|
|
}
|
|
|
|
# shellcheck disable=SC1090
|
|
source "$CONF"
|
|
|
|
if [ -z "$LANG_NAME" ]; then
|
|
echo "LANG_NAME not set in $CONF" >&2
|
|
exit 2
|
|
fi
|
|
SCOREBOARD_DIR="${SCOREBOARD_DIR:-lib/$LANG_NAME}"
|
|
|
|
SX="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
|
if [ ! -x "$SX" ]; then
|
|
MAIN_ROOT=$(git worktree list 2>/dev/null | head -1 | awk '{print $1}')
|
|
if [ -n "${MAIN_ROOT:-}" ] && [ -x "$MAIN_ROOT/$SX" ]; then
|
|
SX="$MAIN_ROOT/$SX"
|
|
else
|
|
echo "ERROR: sx_server.exe not found (set SX_SERVER to override)." >&2
|
|
exit 2
|
|
fi
|
|
fi
|
|
|
|
GC_NAMES=()
|
|
GC_PASS=()
|
|
GC_FAIL=()
|
|
GC_TOTAL_S=()
|
|
|
|
parse_result_line() {
|
|
# Match a (gc-result "name" P F T) line.
|
|
local line="$1"
|
|
if [[ "$line" =~ ^\(gc-result\ \"([^\"]+)\"\ ([0-9]+)\ ([0-9]+)\ ([0-9]+)\)$ ]]; then
|
|
GC_NAMES+=("${BASH_REMATCH[1]}")
|
|
GC_PASS+=("${BASH_REMATCH[2]}")
|
|
GC_FAIL+=("${BASH_REMATCH[3]}")
|
|
GC_TOTAL_S+=("${BASH_REMATCH[4]}")
|
|
return 0
|
|
fi
|
|
return 1
|
|
}
|
|
|
|
case "$MODE" in
|
|
dict)
|
|
SCRIPT='(epoch 1)
|
|
'
|
|
for f in "${PRELOADS[@]}"; do
|
|
SCRIPT+='(load "'"$f"'")
|
|
'
|
|
done
|
|
SCRIPT+='(load "lib/guest/conformance.sx")
|
|
'
|
|
for entry in "${SUITES[@]}"; do
|
|
IFS=: read -r _ file _ <<< "$entry"
|
|
SCRIPT+='(load "'"$file"'")
|
|
'
|
|
done
|
|
SCRIPT+='(epoch 2)
|
|
'
|
|
for entry in "${SUITES[@]}"; do
|
|
IFS=: read -r name _ runner <<< "$entry"
|
|
SCRIPT+='(eval "(gc-dict-result \"'"$name"'\" '"$runner"')")
|
|
'
|
|
done
|
|
OUTPUT=$(printf '%s' "$SCRIPT" | "$SX" 2>&1)
|
|
expected=${#SUITES[@]}
|
|
matched=0
|
|
while IFS= read -r line; do
|
|
if parse_result_line "$line"; then
|
|
matched=$((matched + 1))
|
|
fi
|
|
done <<< "$OUTPUT"
|
|
if [ "$matched" -ne "$expected" ]; then
|
|
echo "Expected $expected suite results, got $matched" >&2
|
|
echo "---- raw output ----" >&2
|
|
printf '%s\n' "$OUTPUT" >&2
|
|
exit 3
|
|
fi
|
|
;;
|
|
counters)
|
|
# Each suite must resolve to a pass/fail counter name — either per-suite
|
|
# (fields 3 & 4 of the SUITES entry) or via the global COUNTERS_PASS /
|
|
# COUNTERS_FAIL defaults. Validate up front so a misconfigured suite fails
|
|
# loudly instead of silently recording a 0/1.
|
|
for entry in "${SUITES[@]}"; do
|
|
IFS=: read -r _sname _sfile _spass _sfail _spre <<< "$entry"
|
|
if [ -z "${_spass:-$COUNTERS_PASS}" ] || [ -z "${_sfail:-$COUNTERS_FAIL}" ]; then
|
|
echo "MODE=counters: suite '${_sname}' has no counter names and COUNTERS_PASS/COUNTERS_FAIL are unset in $CONF" >&2
|
|
exit 2
|
|
fi
|
|
done
|
|
for entry in "${SUITES[@]}"; do
|
|
# Format: name:file[:pass-var:fail-var[:extra-preload ...]]
|
|
# pass-var / fail-var — per-suite counter symbols (default: the global
|
|
# COUNTERS_PASS / COUNTERS_FAIL).
|
|
# extra-preload ... — space-separated .sx files loaded after the
|
|
# global PRELOADS and before the test file. Lets
|
|
# each suite bring its own dependency chain.
|
|
IFS=: read -r name file spass sfail spre <<< "$entry"
|
|
cpass="${spass:-$COUNTERS_PASS}"
|
|
cfail="${sfail:-$COUNTERS_FAIL}"
|
|
TMPFILE=$(mktemp)
|
|
{
|
|
printf '(epoch 1)\n'
|
|
for f in "${PRELOADS[@]}"; do printf '(load "%s")\n' "$f"; done
|
|
# shellcheck disable=SC2086 # deliberate word-split: per-suite preloads
|
|
for f in $spre; do printf '(load "%s")\n' "$f"; done
|
|
printf '(load "lib/guest/conformance.sx")\n'
|
|
printf '(epoch 2)\n'
|
|
printf '(load "%s")\n' "$file"
|
|
printf '(epoch 3)\n'
|
|
printf '(eval "(gc-counters-result \\"%s\\" 0 0 %s %s)")\n' \
|
|
"$name" "$cpass" "$cfail"
|
|
} > "$TMPFILE"
|
|
OUTPUT=$(timeout "$TIMEOUT_PER_SUITE" "$SX" < "$TMPFILE" 2>&1 || true)
|
|
rm -f "$TMPFILE"
|
|
result=$(printf '%s\n' "$OUTPUT" | grep -E '^\(gc-result ' | tail -1 || true)
|
|
if [ -n "$result" ] && parse_result_line "$result"; then
|
|
:
|
|
else
|
|
# Suite hung or crashed before emitting a result. Record 0/1 so it
|
|
# shows up as a failure rather than vanishing.
|
|
GC_NAMES+=("$name")
|
|
GC_PASS+=(0)
|
|
GC_FAIL+=(1)
|
|
GC_TOTAL_S+=(1)
|
|
fi
|
|
done
|
|
;;
|
|
*)
|
|
echo "Unknown MODE=$MODE in $CONF (expected dict|counters)" >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
|
|
GC_TOTAL_PASS=0
|
|
GC_TOTAL_FAIL=0
|
|
GC_TOTAL=0
|
|
for ((i=0; i<${#GC_NAMES[@]}; i++)); do
|
|
GC_TOTAL_PASS=$((GC_TOTAL_PASS + GC_PASS[i]))
|
|
GC_TOTAL_FAIL=$((GC_TOTAL_FAIL + GC_FAIL[i]))
|
|
GC_TOTAL=$((GC_TOTAL + GC_TOTAL_S[i]))
|
|
done
|
|
|
|
mkdir -p "$SCOREBOARD_DIR"
|
|
emit_scoreboard_json > "$SCOREBOARD_DIR/scoreboard.json"
|
|
emit_scoreboard_md > "$SCOREBOARD_DIR/scoreboard.md"
|
|
|
|
if [ "$GC_TOTAL_FAIL" -gt 0 ]; then
|
|
echo "$GC_TOTAL_FAIL failure(s) across $GC_TOTAL tests" >&2
|
|
exit 1
|
|
fi
|
|
echo "All $GC_TOTAL tests pass."
|