Files
rose-ash/lib/agentic/conformance.sh
giles 88c4963fd0 agentic-sx Phase 2: branch — one branch = one agent (TDD)
space handle (repo + relations Datalog db); spawn! = branch-from-briefing
with a genesis spawn commit at the fork point; commit! verb snapshots a full
worktree VALUE into a typed agent-commit and CAS-advances the branch (no
shared index — multi-agent safe). Topology: fork-point via merge-base,
agents from refs, typed edges sub-agent-of/reviews/merges. Session merges
always record a two-parent session-merge commit (no-ff); conflicts commit
nothing and conclude via merge-resolve!. 53/53 (118/118 total).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-03 12:53:58 +00:00

192 lines
5.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# lib/agentic/conformance.sh — run agentic-sx test suites, emit scoreboard.json + scoreboard.md.
set -uo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
if [ ! -x "$SX_SERVER" ]; then
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
fi
if [ ! -x "$SX_SERVER" ]; then
echo "ERROR: sx_server.exe not found." >&2
exit 1
fi
SUITES=(schema branch)
OUT_JSON="lib/agentic/scoreboard.json"
OUT_MD="lib/agentic/scoreboard.md"
# shared prefix: persist + artdag + datalog + sx-git + agentic schema
base_loads() {
cat << 'BASE'
(load "spec/stdlib.sx")
(load "lib/r7rs.sx")
(load "lib/persist/event.sx")
(load "lib/persist/backend.sx")
(load "lib/persist/log.sx")
(load "lib/persist/kv.sx")
(load "lib/artdag/dag.sx")
(load "lib/datalog/tokenizer.sx")
(load "lib/datalog/parser.sx")
(load "lib/datalog/unify.sx")
(load "lib/datalog/db.sx")
(load "lib/datalog/builtins.sx")
(load "lib/datalog/aggregates.sx")
(load "lib/datalog/strata.sx")
(load "lib/datalog/eval.sx")
(load "lib/datalog/api.sx")
(load "lib/datalog/magic.sx")
(load "lib/git/object.sx")
(load "lib/git/ref.sx")
(load "lib/git/dag.sx")
(load "lib/git/worktree.sx")
(load "lib/git/diff.sx")
(load "lib/git/merge.sx")
(load "lib/git/porcelain.sx")
(load "lib/agentic/schema.sx")
BASE
}
# relations stack (branch/trace suites)
relations_loads() {
cat << 'RELS'
(load "lib/relations/schema.sx")
(load "lib/relations/engine.sx")
(load "lib/relations/api.sx")
(load "lib/relations/explain.sx")
(load "lib/relations/federation.sx")
(load "lib/relations/tree.sx")
RELS
}
# scheme + flow stack (durable suite)
flow_loads() {
cat << 'FLOW'
(load "lib/guest/lex.sx")
(load "lib/guest/reflective/env.sx")
(load "lib/guest/reflective/quoting.sx")
(load "lib/scheme/parser.sx")
(load "lib/scheme/eval.sx")
(load "lib/scheme/runtime.sx")
(load "lib/flow/spec.sx")
(load "lib/flow/store.sx")
(load "lib/flow/remote.sx")
(load "lib/flow/host.sx")
(load "lib/flow/api.sx")
FLOW
}
suite_loads() {
local suite=$1
base_loads
case "$suite" in
branch)
relations_loads
echo '(load "lib/agentic/branch.sx")'
;;
trace)
relations_loads
echo '(load "lib/agentic/branch.sx")'
echo '(load "lib/agentic/trace.sx")'
;;
durable)
relations_loads
flow_loads
echo '(load "lib/agentic/branch.sx")'
echo '(load "lib/agentic/trace.sx")'
echo '(load "lib/agentic/durable.sx")'
;;
esac
}
run_suite() {
local suite=$1
local file="lib/agentic/tests/${suite}.sx"
local TMP
TMP=$(mktemp)
{
echo "(epoch 1)"
suite_loads "$suite"
echo "(epoch 2)"
echo '(eval "(define agentic-test-pass 0)")'
echo '(eval "(define agentic-test-fail 0)")'
echo '(eval "(define agentic-test-failures (list))")'
echo '(eval "(define agentic-test (fn (name got expected) (if (equal? got expected) (set! agentic-test-pass (+ agentic-test-pass 1)) (begin (set! agentic-test-fail (+ agentic-test-fail 1)) (set! agentic-test-failures (append agentic-test-failures (list (list name (inspect got) (inspect expected)))))))))")'
echo "(epoch 3)"
echo "(load \"${file}\")"
echo "(epoch 4)"
echo '(eval "(list agentic-test-pass agentic-test-fail)")'
} > "$TMP"
local OUTPUT
OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMP" 2>/dev/null)
rm -f "$TMP"
local LINE
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 4 / {getline; print; exit}')
if [ -z "$LINE" ]; then
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 4 \([0-9]+ [0-9]+\)\)' | tail -1 \
| sed -E 's/^\(ok 4 //; s/\)$//')
fi
local P F
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
P=${P:-0}
F=${F:-0}
echo "${P} ${F}"
}
declare -A SUITE_PASS
declare -A SUITE_FAIL
TOTAL_PASS=0
TOTAL_FAIL=0
echo "Running agentic-sx conformance suite..." >&2
for s in "${SUITES[@]}"; do
read -r p f < <(run_suite "$s")
SUITE_PASS[$s]=$p
SUITE_FAIL[$s]=$f
TOTAL_PASS=$((TOTAL_PASS + p))
TOTAL_FAIL=$((TOTAL_FAIL + f))
printf " %-12s %d/%d\n" "$s" "$p" "$((p+f))" >&2
done
# scoreboard.json
{
printf '{\n'
printf ' "suites": {\n'
first=1
for s in "${SUITES[@]}"; do
if [ $first -eq 0 ]; then printf ',\n'; fi
printf ' "%s": {"pass": %d, "fail": %d}' "$s" "${SUITE_PASS[$s]}" "${SUITE_FAIL[$s]}"
first=0
done
printf '\n },\n'
printf ' "total_pass": %d,\n' "$TOTAL_PASS"
printf ' "total_fail": %d,\n' "$TOTAL_FAIL"
printf ' "total": %d\n' "$((TOTAL_PASS + TOTAL_FAIL))"
} > "$OUT_JSON"
printf '}\n' >> "$OUT_JSON"
# scoreboard.md
{
printf '# agentic-sx Conformance Scoreboard\n\n'
printf '_Generated by `lib/agentic/conformance.sh`_\n\n'
printf '| Suite | Pass | Fail | Total |\n'
printf '|-------|-----:|-----:|------:|\n'
for s in "${SUITES[@]}"; do
p=${SUITE_PASS[$s]}
f=${SUITE_FAIL[$s]}
printf '| %s | %d | %d | %d |\n' "$s" "$p" "$f" "$((p+f))"
done
printf '| **Total** | **%d** | **%d** | **%d** |\n' "$TOTAL_PASS" "$TOTAL_FAIL" "$((TOTAL_PASS + TOTAL_FAIL))"
} > "$OUT_MD"
echo "Wrote $OUT_JSON and $OUT_MD" >&2
echo "Total: $TOTAL_PASS pass, $TOTAL_FAIL fail" >&2
[ "$TOTAL_FAIL" -eq 0 ]