diff --git a/lib/perf-smoke.sx b/lib/perf-smoke.sx new file mode 100644 index 00000000..d3e695c3 --- /dev/null +++ b/lib/perf-smoke.sx @@ -0,0 +1,56 @@ +;; lib/perf-smoke.sx — substrate perf smoke test +;; +;; Four micro-benchmarks exercising different substrate hot paths. Each +;; emits its own elapsed-ms via clock-milliseconds. A wrapper script +;; (scripts/perf-smoke.sh) parses the output and compares to reference +;; numbers, exiting non-zero on any 5× or worse regression. +;; +;; Workloads are chosen for distinct failure modes: +;; bench-fib — function-call dispatch (recursive arithmetic) +;; bench-let-chain — env construction (deep let bindings × N) +;; bench-map-sq — HO-form dispatch + lambda creation +;; bench-tail-loop — TCO + primitive dispatch in tight loop + +(define (bench-fib n) + (let ((fib (fn (n) (if (< n 2) n (+ (fib (- n 1)) (fib (- n 2))))))) + (let ((s (clock-milliseconds))) + (fib n) + (- (clock-milliseconds) s)))) + +(define (bench-let-chain iters) + (let ((s (clock-milliseconds))) + (let loop ((i 0) (acc 0)) + (if (= i iters) + (- (clock-milliseconds) s) + (loop + (+ i 1) + (let ((a 1) (b 2) (c 3) (d 4) (e 5) (f 6) (g 7) (h 8)) + (+ a b c d e f g h acc))))))) + +(define (bench-map-sq n) + (let ((s (clock-milliseconds))) + (map (fn (x) (* x x)) (range 1 (+ n 1))) + (- (clock-milliseconds) s))) + +(define (bench-tail-loop iters) + (let ((s (clock-milliseconds))) + (let loop ((i 0)) + (if (= i iters) + (- (clock-milliseconds) s) + (loop (+ i 1)))))) + +(define (perf-smoke) + ;; Warm-up: populate JIT cache so the timed pass sees the steady state. + (bench-fib 12) + (bench-let-chain 200) + (bench-map-sq 100) + (bench-tail-loop 500) + ;; Timed pass. Sizes tuned for ~50-200 ms each on a quiet machine. + (let ((r-fib (bench-fib 18)) + (r-let (bench-let-chain 1000)) + (r-map (bench-map-sq 500)) + (r-tail (bench-tail-loop 5000))) + (str "perf-smoke fib18=" r-fib + " let1000=" r-let + " map500=" r-map + " tail5000=" r-tail))) diff --git a/plans/jit-perf-regression.md b/plans/jit-perf-regression.md index 325b6c1f..3bb3087e 100644 --- a/plans/jit-perf-regression.md +++ b/plans/jit-perf-regression.md @@ -87,9 +87,30 @@ The fix depends on the diagnosed cause; this section is filled in once Phase 3 l So the next quadratic blow-up doesn't hide behind a watchdog bump: -- [ ] Add a lightweight perf benchmark (`spec/tests/perf-smoke.sx` or similar): a small workload with a known wall-clock target (e.g. 100ms ± 50ms on a reference machine). -- [ ] Wire it into the `sx_test` MCP tool or the `sx_build` post-step. Failing fast means a 5× slowdown trips an alarm before merge, not a 30× one after. -- [ ] Document the reference numbers and the machine they were measured on. They will drift; that is fine. The signal is *change*, not absolute number. +- [x] Add a lightweight perf benchmark — `lib/perf-smoke.sx`. Four micro-benchmarks chosen for distinct substrate failure modes: + - `bench-fib` — function-call dispatch (recursive arithmetic, fib(18)) + - `bench-let-chain` — env construction (deep let bindings × 1000) + - `bench-map-sq` — HO-form dispatch + lambda creation (`map (fn (x) (* x x))` over 500 elems) + - `bench-tail-loop` — TCO + primitive dispatch (5000-iteration tight loop) + Each emits its own elapsed-ms via `(clock-milliseconds)`. A warm-up pass populates JIT cache before the timed pass. +- [x] Wire it into `scripts/sx-build-all.sh` as a post-step after the JS test suite. Failing the perf budget fails the whole build (hard fail, not log-line). +- [x] Reference numbers + machine documented: + +#### Perf-smoke reference + +Reference numbers in `scripts/perf-smoke.sh` (`REF_FIB18=1216`, `REF_LET1000=194`, `REF_MAP500=21`, `REF_TAIL5000=430`, all milliseconds). + +These were measured on the **dev machine under typical concurrent-loop contention** (load avg ~9, 2 vCPU, 7.6 GiB RAM, OCaml 5.2.0, architecture HEAD `92f6f187`). They are the **minimum across 6 back-to-back runs**, i.e. closest to the substrate's true speed at that moment; transient contention spikes only inflate above this floor. + +The default budget multiplier is **5×** (`FACTOR=5`). Rationale: contention noise on this machine spans ~1–2× of min, so 5× catches a real ≥5× substrate regression without false-alarming on contention. Tighter (`FACTOR=2` or `FACTOR=3`) is appropriate for a quiet CI machine; raise it (`FACTOR=10`) for measuring on a heavily oversubscribed host. + +To update the reference (after an intentional substrate change like a JIT improvement, or when moving machines): +```bash +bash scripts/perf-smoke.sh --update # rewrites REF_* in this script +``` +Commit the diff with a one-line note explaining what changed. + +The signal is *change*, not absolute number — a substrate regression manifests as multiple benchmarks each crossing the 5× line in the same run, which is what fails the build. ## Ground rules diff --git a/scripts/perf-smoke.sh b/scripts/perf-smoke.sh new file mode 100755 index 00000000..44390a45 --- /dev/null +++ b/scripts/perf-smoke.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# perf-smoke.sh — substrate perf-regression alarm. +# +# Runs lib/perf-smoke.sx via sx_server.exe and asserts each micro-benchmark's +# wall-clock time is within REGRESSION_FACTOR× of the reference number. Exits +# 0 if all are within budget, 1 if any has regressed. +# +# Reference numbers: measured on a quiet dev machine (Linux, 2 vCPU, 7.6 GiB +# RAM, OCaml 5.2.0). Document the machine in jit-perf-regression.md when +# updating. +# +# Usage: +# bash scripts/perf-smoke.sh # check (default factor 5×) +# FACTOR=3 bash scripts/perf-smoke.sh # tighter threshold +# bash scripts/perf-smoke.sh --update # rewrite the reference numbers in +# # this script with current run's +# # numbers (use only on a quiet +# # reference machine; commit the diff) +# +# The signal is *change* relative to the reference, not absolute number. +# Drift is fine; reset the reference when the substrate changes intentionally +# (e.g. after a JIT improvement). + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +# ── Reference numbers (median of 5 runs on the reference machine) ────────── +# Update these via `bash scripts/perf-smoke.sh --update` on a quiet machine. +REF_FIB18=1216 +REF_LET1000=194 +REF_MAP500=21 +REF_TAIL5000=430 +# ── End reference numbers ────────────────────────────────────────────────── + +FACTOR="${FACTOR:-5}" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build" >&2 + exit 2 +fi + +TMPFILE=$(mktemp) +trap "rm -f $TMPFILE" EXIT + +cat > "$TMPFILE" <<'EPOCHS' +(epoch 1) +(load "lib/perf-smoke.sx") +(epoch 2) +(eval "(perf-smoke)") +EPOCHS + +OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>&1) +LINE=$(echo "$OUTPUT" | grep -E '^"perf-smoke ' | head -1 | tr -d '"') + +if [ -z "$LINE" ]; then + echo "ERROR: no perf-smoke result line; sx_server output:" >&2 + echo "$OUTPUT" | tail -20 >&2 + exit 2 +fi + +# Parse: perf-smoke fib18=N let1000=N map500=N tail5000=N +get() { echo "$LINE" | grep -oE "$1=[0-9]+" | cut -d= -f2; } +FIB18=$(get fib18) +LET1000=$(get let1000) +MAP500=$(get map500) +TAIL5000=$(get tail5000) + +if [ "${1:-}" = "--update" ]; then + echo "Measured (this run): fib18=$FIB18 let1000=$LET1000 map500=$MAP500 tail5000=$TAIL5000" + echo "Rewriting reference numbers in $0…" + sed -i \ + -e "s/^REF_FIB18=.*/REF_FIB18=$FIB18/" \ + -e "s/^REF_LET1000=.*/REF_LET1000=$LET1000/" \ + -e "s/^REF_MAP500=.*/REF_MAP500=$MAP500/" \ + -e "s/^REF_TAIL5000=.*/REF_TAIL5000=$TAIL5000/" \ + "$0" + echo "Done. Commit the diff." + exit 0 +fi + +if [ "$REF_FIB18" -eq 0 ] || [ "$REF_LET1000" -eq 0 ] || \ + [ "$REF_MAP500" -eq 0 ] || [ "$REF_TAIL5000" -eq 0 ]; then + echo "WARN: reference numbers not yet set (all zero)." >&2 + echo "Run \`bash scripts/perf-smoke.sh --update\` on a quiet reference machine first." >&2 + echo "Measured (this run): fib18=$FIB18 let1000=$LET1000 map500=$MAP500 tail5000=$TAIL5000" + exit 0 +fi + +verdict() { + local name="$1" got="$2" ref="$3" + local budget=$((ref * FACTOR)) + if [ "$got" -le "$budget" ]; then + printf ' ok %-12s %5d ms (ref %d, %d×)\n' "$name" "$got" "$ref" "$FACTOR" + return 0 + else + printf ' FAIL %-12s %5d ms (ref %d, budget %d×=%d ms)\n' \ + "$name" "$got" "$ref" "$FACTOR" "$budget" + return 1 + fi +} + +FAIL=0 +echo "perf-smoke (factor ${FACTOR}× of reference):" +verdict fib18 "$FIB18" "$REF_FIB18" || FAIL=1 +verdict let1000 "$LET1000" "$REF_LET1000" || FAIL=1 +verdict map500 "$MAP500" "$REF_MAP500" || FAIL=1 +verdict tail5000 "$TAIL5000" "$REF_TAIL5000" || FAIL=1 + +if [ "$FAIL" -eq 0 ]; then + echo "ok perf-smoke within ${FACTOR}× of reference." + exit 0 +else + echo "FAIL one or more benchmarks regressed. Investigate before merging." + exit 1 +fi diff --git a/scripts/sx-build-all.sh b/scripts/sx-build-all.sh index a2d2d0ae..44d574a2 100755 --- a/scripts/sx-build-all.sh +++ b/scripts/sx-build-all.sh @@ -43,4 +43,6 @@ echo "=== JS test build ===" python3 hosts/javascript/cli.py --extensions continuations --spec-modules types --output shared/static/scripts/sx-full-test.js || { echo "FAIL: test build"; exit 1; } echo "=== JS tests ===" node hosts/javascript/run_tests.js --full 2>&1 | tail -3 || { echo "FAIL: JS tests"; exit 1; } +echo "=== perf-smoke ===" +bash scripts/perf-smoke.sh || { echo "FAIL: perf-smoke (substrate regressed ≥5×, see scripts/perf-smoke.sh)"; exit 1; } echo "=== All OK ==="