Merge lib/guest/test-runner into architecture: test-runner.sx + Kernel migration (POC)

This commit is contained in:
2026-05-14 20:18:03 +00:00
9 changed files with 154 additions and 105 deletions

50
lib/guest/test-runner.sx Normal file
View File

@@ -0,0 +1,50 @@
;; lib/guest/test-runner.sx — per-suite test harness for guest test files.
;;
;; Across the codebase 142+ test files implement the identical four-form
;; boilerplate: `<X>-test-pass`, `<X>-test-fail`, `<X>-test-fails`, and
;; an `<X>-test` recording function. Only the prefix differs. This kit
;; collapses the boilerplate to a per-suite mutable dict + a recording
;; helper, so each test file goes from ~12 lines of harness to ~3:
;;
;; (define ke-suite (refl-make-test-suite))
;; (define ke-test (fn (n a e) (refl-test ke-suite n a e)))
;; (define ke-tests-run! (fn () (refl-test-report ke-suite)))
;;
;; The suite is a mutable dict `{:pass N :fail N :fails LIST}`. Each
;; failed assertion appends `{:name NAME :expected EXPECTED :actual ACT}`
;; to :fails — same shape every existing harness already produces.
;;
;; The `:fails` list is mutated in place via `append!`, so callers who
;; have a reference to it see the same updates. (Same semantic the
;; existing per-suite globals had — just held in the suite dict now.)
;;
;; Public API
;; (refl-make-test-suite) — fresh suite
;; (refl-test SUITE NAME ACT EXP) — record one assertion
;; (refl-test-report SUITE) — return {:total :passed :failed :fails}
;; (refl-test-pass? SUITE) — convenience: all green?
;; (refl-test-suite? V) — predicate
(define refl-make-test-suite (fn () {:fail 0 :pass 0 :fails (list)}))
(define
refl-test-suite?
(fn
(v)
(and (dict? v) (number? (get v :pass)) (number? (get v :fail)))))
(define
refl-test
(fn
(suite name actual expected)
(cond
((= actual expected)
(dict-set! suite :pass (+ (get suite :pass) 1)))
(:else
(begin
(dict-set! suite :fail (+ (get suite :fail) 1))
(append! (get suite :fails) {:name name :actual actual :expected expected}))))))
(define refl-test-report (fn (suite) {:total (+ (get suite :pass) (get suite :fail)) :passed (get suite :pass) :failed (get suite :fail) :fails (get suite :fails)}))
(define refl-test-pass? (fn (suite) (= (get suite :fail) 0)))

View File

@@ -5,20 +5,8 @@
;; identity is per-call, so two `(make-encapsulation-type)` calls
;; produce non-interchangeable families.
(define ken-test-pass 0)
(define ken-test-fail 0)
(define ken-test-fails (list))
(define
ken-test
(fn
(name actual expected)
(if
(= actual expected)
(set! ken-test-pass (+ ken-test-pass 1))
(begin
(set! ken-test-fail (+ ken-test-fail 1))
(append! ken-test-fails {:name name :actual actual :expected expected})))))
(define ken-suite (refl-make-test-suite))
(define ken-test (fn (n a e) (refl-test ken-suite n a e)))
(define ken-eval-in (fn (src env) (kernel-eval (kernel-parse src) env)))
@@ -180,4 +168,4 @@
env))
(list true 7))
(define ken-tests-run! (fn () {:total (+ ken-test-pass ken-test-fail) :passed ken-test-pass :failed ken-test-fail :fails ken-test-fails}))
(define ken-tests-run! (fn () (refl-test-report ken-suite)))

View File

@@ -5,20 +5,8 @@
;; ($if, $define!, $lambda, …) arrive in Phase 4, so tests build a
;; minimal env on the fly and verify the dispatch contract directly.
(define ke-test-pass 0)
(define ke-test-fail 0)
(define ke-test-fails (list))
(define
ke-test
(fn
(name actual expected)
(if
(= actual expected)
(set! ke-test-pass (+ ke-test-pass 1))
(begin
(set! ke-test-fail (+ ke-test-fail 1))
(append! ke-test-fails {:name name :actual actual :expected expected})))))
(define ke-suite (refl-make-test-suite))
(define ke-test (fn (n a e) (refl-test ke-suite n a e)))
;; ── helpers ──────────────────────────────────────────────────────
@@ -267,4 +255,4 @@
(kernel-eval (kernel-parse "(x 1)") env)))
:raised)
(define ke-tests-run! (fn () {:total (+ ke-test-pass ke-test-fail) :passed ke-test-pass :failed ke-test-fail :fails ke-test-fails}))
(define ke-tests-run! (fn () (refl-test-report ke-suite)))

View File

@@ -10,20 +10,8 @@
;; provenance markers) is research-grade and is NOT implemented — see
;; the plan's reflective-API notes for the proposed approach.
(define kh-test-pass 0)
(define kh-test-fail 0)
(define kh-test-fails (list))
(define
kh-test
(fn
(name actual expected)
(if
(= actual expected)
(set! kh-test-pass (+ kh-test-pass 1))
(begin
(set! kh-test-fail (+ kh-test-fail 1))
(append! kh-test-fails {:name name :actual actual :expected expected})))))
(define kh-suite (refl-make-test-suite))
(define kh-test (fn (n a e) (refl-test kh-suite n a e)))
(define kh-eval-in (fn (src env) (kernel-eval (kernel-parse src) env)))
@@ -217,4 +205,4 @@
(kh-eval-in "($let ((x 5)) ($define! tmp (+ x 1)) tmp)"
(kernel-standard-env)) 6)
(define kh-tests-run! (fn () {:total (+ kh-test-pass kh-test-fail) :passed kh-test-pass :failed kh-test-fail :fails kh-test-fails}))
(define kh-tests-run! (fn () (refl-test-report kh-suite)))

View File

@@ -12,20 +12,8 @@
;; first-class evaluators all line up — enough so a Kernel program
;; can itself reason about Kernel programs.
(define kmc-test-pass 0)
(define kmc-test-fail 0)
(define kmc-test-fails (list))
(define
kmc-test
(fn
(name actual expected)
(if
(= actual expected)
(set! kmc-test-pass (+ kmc-test-pass 1))
(begin
(set! kmc-test-fail (+ kmc-test-fail 1))
(append! kmc-test-fails {:name name :actual actual :expected expected})))))
(define kmc-suite (refl-make-test-suite))
(define kmc-test (fn (n a e) (refl-test kmc-suite n a e)))
;; Build a Kernel env with m-eval and m-apply defined. The two refer
;; to each other and to standard primitives, so we use the standard
@@ -159,4 +147,4 @@
env))
49)
(define kmc-tests-run! (fn () {:total (+ kmc-test-pass kmc-test-fail) :passed kmc-test-pass :failed kmc-test-fail :fails kmc-test-fails}))
(define kmc-tests-run! (fn () (refl-test-report kmc-suite)))

View File

@@ -1,19 +1,7 @@
;; lib/kernel/tests/parse.sx — exercises lib/kernel/parser.sx.
(define knl-test-pass 0)
(define knl-test-fail 0)
(define knl-test-fails (list))
(define
knl-test
(fn
(name actual expected)
(if
(= actual expected)
(set! knl-test-pass (+ knl-test-pass 1))
(begin
(set! knl-test-fail (+ knl-test-fail 1))
(append! knl-test-fails {:name name :actual actual :expected expected})))))
(define knl-suite (refl-make-test-suite))
(define knl-test (fn (n a e) (refl-test knl-suite n a e)))
;; ── atoms: numbers ────────────────────────────────────────────────
(knl-test "num: integer" (kernel-parse "42") 42)
@@ -155,4 +143,4 @@
(kernel-parse "(a 'b c)")
(list "a" (list "$quote" "b") "c"))
(define knl-tests-run! (fn () {:total (+ knl-test-pass knl-test-fail) :passed knl-test-pass :failed knl-test-fail :fails knl-test-fails}))
(define knl-tests-run! (fn () (refl-test-report knl-suite)))

View File

@@ -5,20 +5,8 @@
;; first-class environment manipulation. Each test starts from a fresh
;; standard env via `(kernel-standard-env)`.
(define ks-test-pass 0)
(define ks-test-fail 0)
(define ks-test-fails (list))
(define
ks-test
(fn
(name actual expected)
(if
(= actual expected)
(set! ks-test-pass (+ ks-test-pass 1))
(begin
(set! ks-test-fail (+ ks-test-fail 1))
(append! ks-test-fails {:name name :actual actual :expected expected})))))
(define ks-suite (refl-make-test-suite))
(define ks-test (fn (n a e) (refl-test ks-suite n a e)))
(define
ks-eval
@@ -442,4 +430,4 @@
(ks-test "reverse: double reverse is identity"
(ks-eval "(reverse (reverse (list 1 2 3)))") (list 1 2 3))
(define ks-tests-run! (fn () {:total (+ ks-test-pass ks-test-fail) :passed ks-test-pass :failed ks-test-fail :fails ks-test-fails}))
(define ks-tests-run! (fn () (refl-test-report ks-suite)))

View File

@@ -5,20 +5,8 @@
;; base-env, bind a few helper applicatives (+, *, list, =, $if), and
;; run programs that construct and use custom combiners.
(define kv-test-pass 0)
(define kv-test-fail 0)
(define kv-test-fails (list))
(define
kv-test
(fn
(name actual expected)
(if
(= actual expected)
(set! kv-test-pass (+ kv-test-pass 1))
(begin
(set! kv-test-fail (+ kv-test-fail 1))
(append! kv-test-fails {:name name :actual actual :expected expected})))))
(define kv-suite (refl-make-test-suite))
(define kv-test (fn (n a e) (refl-test kv-suite n a e)))
(define kv-eval-src (fn (src env) (kernel-eval (kernel-parse src) env)))
@@ -306,4 +294,4 @@
(kv-test "lambda: zero-arg multi-body"
(kv-eval-src "(($lambda () 1 2 3))" (kv-make-env)) 3)
(define kv-tests-run! (fn () {:total (+ kv-test-pass kv-test-fail) :passed kv-test-pass :failed kv-test-fail :fails kv-test-fails}))
(define kv-tests-run! (fn () (refl-test-report kv-suite)))

View File

@@ -0,0 +1,83 @@
# lib/guest/test-runner.sx — extraction plan
## Status
- [x] **Phase 1 — Kit + Kernel.** `lib/guest/test-runner.sx` extracted (5 forms, ~50 LoC). Seven Kernel test files migrated (`parse.sx`, `eval.sx`, `vau.sx`, `standard.sx`, `encap.sx`, `hygiene.sx`, `metacircular.sx`). 322/322 tests unchanged. 84 LoC removed.
- [ ] **Phase 2 — Per-guest migrations.** 35 additional test files match the standard `(define X-test-pass 0)` pattern across 7 guests; each needs the test runner's `(load "lib/guest/test-runner.sx")` line and a per-file harness rewrite. Variant shapes (Tcl, Smalltalk, APL alternate) need *different* extraction targets.
## The kit
```lisp
(refl-make-test-suite) {:pass 0 :fail 0 :fails (list)}
(refl-test SUITE NAME ACT EXP) mutates suite (dict-set! + append!)
(refl-test-report SUITE) {:total :passed :failed :fails}
(refl-test-pass? SUITE) bool
(refl-test-suite? V) predicate
```
Each migrated test file collapses from a 14-line harness boilerplate to:
```lisp
(define X-suite (refl-make-test-suite))
(define X-test (fn (n a e) (refl-test X-suite n a e)))
;; ... tests ...
(define X-tests-run! (fn () (refl-test-report X-suite)))
```
## Per-guest migration status
| Guest | Std-pattern files | Variant? | Migration commit |
|--------------|------------------:|---------------------|------------------|
| kernel | 7 | - | landed (Phase 1) |
| prolog | 23 | - | pending |
| common-lisp | 4 | - | pending |
| erlang | 4 | sub-prefix per file | pending |
| apl | 1 | uses `set!` over `append`, args are `got expected` | needs variant adapter |
| forth | 1 | - | pending |
| minikanren | 1 | - | pending |
| ruby | 1 | - | pending |
| tcl | 0 (uses different shape — see below) | `tcl-X-pass/-failures`, append! a STRING | needs variant kit |
| smalltalk | 0 (uses SUnit) | own framework | not applicable |
| haskell, hyperscript, js, lua | 0 | no per-suite harness | not applicable |
## Variant shapes that need their own extractions
### Tcl-flavour (`lib/tcl/tests/*.sx`)
```lisp
(define tcl-X-pass 0)
(define tcl-X-fail 0)
(define tcl-X-failures (list)) ; not -fails
(define tcl-assert
(fn (label expected actual) ; arg order: label, expected, actual
(if (= expected actual)
(set! tcl-X-pass (+ tcl-X-pass 1))
(begin (set! tcl-X-fail (+ tcl-X-fail 1))
(append! tcl-X-failures
(str label ": expected=" (str expected) " got=" (str actual)))))))
```
Difference from canonical: failures hold formatted *strings*, not dicts. Could be unified by passing a formatter cfg, but the API divergence is real (Tcl's report just lists strings; the kit's `:fails` holds dicts). Either:
- Add `refl-make-string-formatting-test-suite` variant, or
- Migrate Tcl's reports to dict-of-name-expected-actual (preferred — converges the shape).
### APL-flavour
```lisp
(define apl-test
(fn (name got expected) ; arg order: name, got, expected
(if (= got expected) ...
(set! apl-test-fails (append apl-test-fails (list {:got got :expected expected :name name}))))))
```
Difference: uses functional `append`, not `append!`. Argument order: `got expected`, not `actual expected`. Minor; renaming `n a e` in the kit call site to `n got expected` is the only change.
## Migration playbook (for the loop that finishes Phase 2)
1. Pick a guest from the table.
2. Run `/tmp/migrate_harness.py lib/<guest>/tests/*.sx`. SKIP entries are variant-shaped; ignore them or hand-migrate.
3. Add `(load "lib/guest/test-runner.sx")` to that guest's `test.sh` and `conformance.sh` (before the first guest .sx load that uses the harness).
4. Run the guest's full test suite; counts should be unchanged.
5. Commit `test-runner: migrate <guest> — N files, NN LoC saved`.
Estimated effort: 12 hours per guest, dominated by verification time. Prolog (23 files) is the biggest single commit but the most mechanical.