Per-agent buffer = persist append-only log stream + kv drain cursor; commit-with-trace! drains everything-since-last-commit into a console-trace object and binds it git-note style (ref notes/trace/<commit-cid> -> trace cid). Trace never enters the commit tree; binding is a re-bindable ref layer over immutable objects; failed commits keep the buffer; plain commit! leaves binding to the agent. 35/35 (153/153 total). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
192 lines
5.2 KiB
Bash
Executable File
192 lines
5.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# lib/agentic/conformance.sh — run agentic-sx test suites, emit scoreboard.json + scoreboard.md.
|
|
|
|
set -uo pipefail
|
|
cd "$(git rev-parse --show-toplevel)"
|
|
|
|
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
|
if [ ! -x "$SX_SERVER" ]; then
|
|
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
|
|
fi
|
|
if [ ! -x "$SX_SERVER" ]; then
|
|
echo "ERROR: sx_server.exe not found." >&2
|
|
exit 1
|
|
fi
|
|
|
|
SUITES=(schema branch trace)
|
|
|
|
OUT_JSON="lib/agentic/scoreboard.json"
|
|
OUT_MD="lib/agentic/scoreboard.md"
|
|
|
|
# shared prefix: persist + artdag + datalog + sx-git + agentic schema
|
|
base_loads() {
|
|
cat << 'BASE'
|
|
(load "spec/stdlib.sx")
|
|
(load "lib/r7rs.sx")
|
|
(load "lib/persist/event.sx")
|
|
(load "lib/persist/backend.sx")
|
|
(load "lib/persist/log.sx")
|
|
(load "lib/persist/kv.sx")
|
|
(load "lib/artdag/dag.sx")
|
|
(load "lib/datalog/tokenizer.sx")
|
|
(load "lib/datalog/parser.sx")
|
|
(load "lib/datalog/unify.sx")
|
|
(load "lib/datalog/db.sx")
|
|
(load "lib/datalog/builtins.sx")
|
|
(load "lib/datalog/aggregates.sx")
|
|
(load "lib/datalog/strata.sx")
|
|
(load "lib/datalog/eval.sx")
|
|
(load "lib/datalog/api.sx")
|
|
(load "lib/datalog/magic.sx")
|
|
(load "lib/git/object.sx")
|
|
(load "lib/git/ref.sx")
|
|
(load "lib/git/dag.sx")
|
|
(load "lib/git/worktree.sx")
|
|
(load "lib/git/diff.sx")
|
|
(load "lib/git/merge.sx")
|
|
(load "lib/git/porcelain.sx")
|
|
(load "lib/agentic/schema.sx")
|
|
BASE
|
|
}
|
|
|
|
# relations stack (branch/trace suites)
|
|
relations_loads() {
|
|
cat << 'RELS'
|
|
(load "lib/relations/schema.sx")
|
|
(load "lib/relations/engine.sx")
|
|
(load "lib/relations/api.sx")
|
|
(load "lib/relations/explain.sx")
|
|
(load "lib/relations/federation.sx")
|
|
(load "lib/relations/tree.sx")
|
|
RELS
|
|
}
|
|
|
|
# scheme + flow stack (durable suite)
|
|
flow_loads() {
|
|
cat << 'FLOW'
|
|
(load "lib/guest/lex.sx")
|
|
(load "lib/guest/reflective/env.sx")
|
|
(load "lib/guest/reflective/quoting.sx")
|
|
(load "lib/scheme/parser.sx")
|
|
(load "lib/scheme/eval.sx")
|
|
(load "lib/scheme/runtime.sx")
|
|
(load "lib/flow/spec.sx")
|
|
(load "lib/flow/store.sx")
|
|
(load "lib/flow/remote.sx")
|
|
(load "lib/flow/host.sx")
|
|
(load "lib/flow/api.sx")
|
|
FLOW
|
|
}
|
|
|
|
suite_loads() {
|
|
local suite=$1
|
|
base_loads
|
|
case "$suite" in
|
|
branch)
|
|
relations_loads
|
|
echo '(load "lib/agentic/branch.sx")'
|
|
;;
|
|
trace)
|
|
relations_loads
|
|
echo '(load "lib/agentic/branch.sx")'
|
|
echo '(load "lib/agentic/trace.sx")'
|
|
;;
|
|
durable)
|
|
relations_loads
|
|
flow_loads
|
|
echo '(load "lib/agentic/branch.sx")'
|
|
echo '(load "lib/agentic/trace.sx")'
|
|
echo '(load "lib/agentic/durable.sx")'
|
|
;;
|
|
esac
|
|
}
|
|
|
|
run_suite() {
|
|
local suite=$1
|
|
local file="lib/agentic/tests/${suite}.sx"
|
|
local TMP
|
|
TMP=$(mktemp)
|
|
{
|
|
echo "(epoch 1)"
|
|
suite_loads "$suite"
|
|
echo "(epoch 2)"
|
|
echo '(eval "(define agentic-test-pass 0)")'
|
|
echo '(eval "(define agentic-test-fail 0)")'
|
|
echo '(eval "(define agentic-test-failures (list))")'
|
|
echo '(eval "(define agentic-test (fn (name got expected) (if (equal? got expected) (set! agentic-test-pass (+ agentic-test-pass 1)) (begin (set! agentic-test-fail (+ agentic-test-fail 1)) (set! agentic-test-failures (append agentic-test-failures (list (list name (inspect got) (inspect expected)))))))))")'
|
|
echo "(epoch 3)"
|
|
echo "(load \"${file}\")"
|
|
echo "(epoch 4)"
|
|
echo '(eval "(list agentic-test-pass agentic-test-fail)")'
|
|
} > "$TMP"
|
|
|
|
local OUTPUT
|
|
OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMP" 2>/dev/null)
|
|
rm -f "$TMP"
|
|
|
|
local LINE
|
|
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 4 / {getline; print; exit}')
|
|
if [ -z "$LINE" ]; then
|
|
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 4 \([0-9]+ [0-9]+\)\)' | tail -1 \
|
|
| sed -E 's/^\(ok 4 //; s/\)$//')
|
|
fi
|
|
|
|
local P F
|
|
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
|
|
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
|
|
P=${P:-0}
|
|
F=${F:-0}
|
|
echo "${P} ${F}"
|
|
}
|
|
|
|
declare -A SUITE_PASS
|
|
declare -A SUITE_FAIL
|
|
TOTAL_PASS=0
|
|
TOTAL_FAIL=0
|
|
|
|
echo "Running agentic-sx conformance suite..." >&2
|
|
for s in "${SUITES[@]}"; do
|
|
read -r p f < <(run_suite "$s")
|
|
SUITE_PASS[$s]=$p
|
|
SUITE_FAIL[$s]=$f
|
|
TOTAL_PASS=$((TOTAL_PASS + p))
|
|
TOTAL_FAIL=$((TOTAL_FAIL + f))
|
|
printf " %-12s %d/%d\n" "$s" "$p" "$((p+f))" >&2
|
|
done
|
|
|
|
# scoreboard.json
|
|
{
|
|
printf '{\n'
|
|
printf ' "suites": {\n'
|
|
first=1
|
|
for s in "${SUITES[@]}"; do
|
|
if [ $first -eq 0 ]; then printf ',\n'; fi
|
|
printf ' "%s": {"pass": %d, "fail": %d}' "$s" "${SUITE_PASS[$s]}" "${SUITE_FAIL[$s]}"
|
|
first=0
|
|
done
|
|
printf '\n },\n'
|
|
printf ' "total_pass": %d,\n' "$TOTAL_PASS"
|
|
printf ' "total_fail": %d,\n' "$TOTAL_FAIL"
|
|
printf ' "total": %d\n' "$((TOTAL_PASS + TOTAL_FAIL))"
|
|
} > "$OUT_JSON"
|
|
printf '}\n' >> "$OUT_JSON"
|
|
|
|
# scoreboard.md
|
|
{
|
|
printf '# agentic-sx Conformance Scoreboard\n\n'
|
|
printf '_Generated by `lib/agentic/conformance.sh`_\n\n'
|
|
printf '| Suite | Pass | Fail | Total |\n'
|
|
printf '|-------|-----:|-----:|------:|\n'
|
|
for s in "${SUITES[@]}"; do
|
|
p=${SUITE_PASS[$s]}
|
|
f=${SUITE_FAIL[$s]}
|
|
printf '| %s | %d | %d | %d |\n' "$s" "$p" "$f" "$((p+f))"
|
|
done
|
|
printf '| **Total** | **%d** | **%d** | **%d** |\n' "$TOTAL_PASS" "$TOTAL_FAIL" "$((TOTAL_PASS + TOTAL_FAIL))"
|
|
} > "$OUT_MD"
|
|
|
|
echo "Wrote $OUT_JSON and $OUT_MD" >&2
|
|
echo "Total: $TOTAL_PASS pass, $TOTAL_FAIL fail" >&2
|
|
|
|
[ "$TOTAL_FAIL" -eq 0 ]
|