#!/bin/bash # test-suite-baseline.sh — W14/F10: make FAIL mean something again. # # The review (conformance.md F-10): the OCaml suite is not green — a # permanent ~274-failure band (in-progress hs-* + r7rs radix shadow) is # normalized, so real regressions hide inside the red noise and nobody can # tell a new failure from the band. # # This gate pins the band instead of ignoring it: the full suite's FAIL # set is diffed against the checked-in baseline # (spec/tests/known-failures.txt). Two red conditions, both loud: # NEW failure -> a real regression: fix it (or, if intentional, # justify + add to the baseline in the same commit) # VANISHED failure -> something got fixed: delete it from the baseline # so the win is locked in # Neither touches the runner or the hs loops' scoreboards — the band still # prints as FAIL lines for the teams working through it. # # Usage: bash scripts/test-suite-baseline.sh # Runtime: full suite, ~5–15 min. Exit 0 = fail set identical to baseline. set -uo pipefail cd "$(dirname "$0")/.." RUNNER=hosts/ocaml/_build/default/bin/run_tests.exe BASELINE=spec/tests/known-failures.txt [[ -x "$RUNNER" ]] || { echo "SKIP: $RUNNER not built" >&2; exit 2; } [[ -f "$BASELINE" ]] || { echo "SKIP: $BASELINE missing" >&2; exit 2; } log=$(mktemp) timeout 3000 "$RUNNER" > "$log" 2>&1 rc=$? if [[ $rc -ne 0 && $rc -ne 1 ]]; then echo "RED: runner exited $rc (timeout/crash)"; tail -5 "$log"; rm -f "$log"; exit 1 fi # Normalize: keep the stable test identity (suite > name), drop messages # (error text may contain addresses/timings that churn). current=$(mktemp) grep '^ FAIL: ' "$log" | sed 's/^ FAIL: //; s/: .*$//' | sort -u > "$current" new_failures=$(comm -13 <(sort -u "$BASELINE") "$current") vanished=$(comm -23 <(sort -u "$BASELINE") "$current") summary=$(grep '^Results:' "$log" | tail -1) red=0 if [[ -n "$new_failures" ]]; then echo "RED: NEW failures not in baseline:" sed 's/^/ + /' <<<"$new_failures" red=1 fi if [[ -n "$vanished" ]]; then echo "RED: baseline entries now PASSING (delete them from $BASELINE):" sed 's/^/ - /' <<<"$vanished" red=1 fi if [[ $red -eq 0 ]]; then echo "GREEN: fail set identical to baseline ($(wc -l < "$BASELINE") known failures)" fi echo "$summary" rm -f "$log" "$current" exit $red