diff --git a/lib/mod/conformance.conf b/lib/mod/conformance.conf index 75243257..517be3c0 100644 --- a/lib/mod/conformance.conf +++ b/lib/mod/conformance.conf @@ -18,6 +18,7 @@ PRELOADS=( lib/mod/offenders.sx lib/mod/quorum.sx lib/mod/trace.sx + lib/mod/whatif.sx lib/mod/lifecycle.sx lib/mod/audit.sx lib/mod/api.sx @@ -38,4 +39,5 @@ SUITES=( "offenders:lib/mod/tests/offenders.sx:(mod-offenders-tests-run!)" "quorum:lib/mod/tests/quorum.sx:(mod-quorum-tests-run!)" "trace:lib/mod/tests/trace.sx:(mod-trace-tests-run!)" + "whatif:lib/mod/tests/whatif.sx:(mod-whatif-tests-run!)" ) diff --git a/lib/mod/scoreboard.json b/lib/mod/scoreboard.json index 6282142f..3c9acdc6 100644 --- a/lib/mod/scoreboard.json +++ b/lib/mod/scoreboard.json @@ -1,8 +1,8 @@ { "lang": "mod", - "total_passed": 247, + "total_passed": 260, "total_failed": 0, - "total": 247, + "total": 260, "suites": [ {"name":"decide","passed":31,"failed":0,"total":31}, {"name":"audit","passed":29,"failed":0,"total":29}, @@ -14,7 +14,8 @@ {"name":"severity","passed":14,"failed":0,"total":14}, {"name":"offenders","passed":19,"failed":0,"total":19}, {"name":"quorum","passed":9,"failed":0,"total":9}, - {"name":"trace","passed":15,"failed":0,"total":15} + {"name":"trace","passed":15,"failed":0,"total":15}, + {"name":"whatif","passed":13,"failed":0,"total":13} ], - "generated": "2026-06-06T18:48:10+00:00" + "generated": "2026-06-06T18:51:15+00:00" } diff --git a/lib/mod/scoreboard.md b/lib/mod/scoreboard.md index 04d8ae2d..365c2351 100644 --- a/lib/mod/scoreboard.md +++ b/lib/mod/scoreboard.md @@ -1,6 +1,6 @@ # mod scoreboard -**247 / 247 passing** (0 failure(s)). +**260 / 260 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -15,3 +15,4 @@ | offenders | 19 | 19 | ok | | quorum | 9 | 9 | ok | | trace | 15 | 15 | ok | +| whatif | 13 | 13 | ok | diff --git a/lib/mod/tests/whatif.sx b/lib/mod/tests/whatif.sx new file mode 100644 index 00000000..d4eb8099 --- /dev/null +++ b/lib/mod/tests/whatif.sx @@ -0,0 +1,117 @@ +;; lib/mod/tests/whatif.sx — Ext 10: policy what-if / impact analysis. + +(define mod-wi-count 0) +(define mod-wi-pass 0) +(define mod-wi-fail 0) +(define mod-wi-failures (list)) + +(define + mod-wi-test! + (fn + (name got expected) + (begin + (set! mod-wi-count (+ mod-wi-count 1)) + (if + (= got expected) + (set! mod-wi-pass (+ mod-wi-pass 1)) + (begin + (set! mod-wi-fail (+ mod-wi-fail 1)) + (append! + mod-wi-failures + (str name "\n expected: " expected "\n got: " got))))))) + +;; rules-b is the default policy with spam-hide removed: spam now falls through +;; to default-keep. A spam report flips hide → keep; everything else is unchanged. +(define mod-wi-rules-a mod/default-rules) +(define + mod-wi-rules-b + (list + (mod/mk-rule + "reviewer-remove" + :remove (list (list :evidence "confirmed-abuse"))) + (mod/mk-rule + "abuse-remove" + :remove (list (list :classification "abuse"))) + (mod/mk-rule + "repeated-escalate" + :escalate (list (list :count-at-least 3))) + (mod/mk-rule "default-keep" :keep (list)))) + +(define mod-wi-spam (mod/mk-report "r1" "a" "bob" "this is spam")) +(define mod-wi-abuse (mod/mk-report "r2" "a" "carol" "harassment here")) +(define mod-wi-clean (mod/mk-report "r3" "a" "dave" "a fine post")) + +;; ── single-report diff ── + +(define + mod-wi-d + (mod/decision-diff + mod-wi-spam + (list mod-wi-spam) + mod-wi-rules-a + mod-wi-rules-b)) +(mod-wi-test! "spam before = hide" (get mod-wi-d :before) "hide") +(mod-wi-test! "spam after = keep" (get mod-wi-d :after) "keep") +(mod-wi-test! "spam decision flips" (get mod-wi-d :changed) true) +(mod-wi-test! "diff carries report id" (get mod-wi-d :report-id) "r1") + +(define + mod-wi-da + (mod/decision-diff + mod-wi-abuse + (list mod-wi-abuse) + mod-wi-rules-a + mod-wi-rules-b)) +(mod-wi-test! "abuse unchanged (remove both)" (get mod-wi-da :changed) false) +(mod-wi-test! "abuse stays remove" (get mod-wi-da :after) "remove") + +(define + mod-wi-dc + (mod/decision-diff + mod-wi-clean + (list mod-wi-clean) + mod-wi-rules-a + mod-wi-rules-b)) +(mod-wi-test! "clean unchanged (keep both)" (get mod-wi-dc :changed) false) + +;; ── batch impact ── + +(define mod-wi-batch (list mod-wi-spam mod-wi-abuse mod-wi-clean)) +(define + mod-wi-impact + (mod/policy-impact mod-wi-batch mod-wi-rules-a mod-wi-rules-b)) + +(mod-wi-test! + "impact lists only changed reports" + (len mod-wi-impact) + 1) +(mod-wi-test! + "impacted report is the spam one" + (get (first mod-wi-impact) :report-id) + "r1") +(mod-wi-test! + "impact-count agrees" + (mod/impact-count mod-wi-batch mod-wi-rules-a mod-wi-rules-b) + 1) + +;; ── identical rule sets → no impact ── + +(mod-wi-test! + "same rules → zero impact" + (mod/impact-count mod-wi-batch mod-wi-rules-a mod-wi-rules-a) + 0) +(mod-wi-test! + "same rules → empty report" + (mod/impact-report mod-wi-batch mod-wi-rules-a mod-wi-rules-a) + "No decisions change.") + +;; ── rendering ── + +(mod-wi-test! + "impact-report renders the flip" + (mod/str-contains? + (mod/impact-report mod-wi-batch mod-wi-rules-a mod-wi-rules-b) + "r1: hide → keep") + true) + +(define mod-whatif-tests-run! (fn () {:failures mod-wi-failures :total mod-wi-count :passed mod-wi-pass :failed mod-wi-fail})) diff --git a/lib/mod/whatif.sx b/lib/mod/whatif.sx new file mode 100644 index 00000000..23b24eac --- /dev/null +++ b/lib/mod/whatif.sx @@ -0,0 +1,56 @@ +;; lib/mod/whatif.sx — policy what-if / impact analysis. +;; +;; Before shipping a policy change, a moderation team needs to know which past or +;; pending reports would decide differently. mod/decision-diff compares one +;; report's action under two rule sets; mod/policy-impact runs a whole batch and +;; returns only the reports whose decision flips. Pure SX over decide-report. + +(define + mod/decision-diff + (fn + (r reports rules-a rules-b) + (let + ((a (get (mod/decide-report r reports rules-a) :action)) + (b (get (mod/decide-report r reports rules-b) :action))) + {:after b :changed (if (= a b) false true) :report-id (mod/report-id r) :before a}))) + +(define + mod/policy-impact + (fn + (reports rules-a rules-b) + (reduce + (fn + (acc r) + (let + ((d (mod/decision-diff r reports rules-a rules-b))) + (if (get d :changed) (append acc (list d)) acc))) + (list) + reports))) + +(define + mod/impact-count + (fn + (reports rules-a rules-b) + (len (mod/policy-impact reports rules-a rules-b)))) + +(define + mod/impact-report + (fn + (reports rules-a rules-b) + (let + ((changed (mod/policy-impact reports rules-a rules-b))) + (if + (empty? changed) + "No decisions change." + (mod/join-with + "\n" + (map + (fn + (d) + (str + (get d :report-id) + ": " + (get d :before) + " → " + (get d :after))) + changed)))))) diff --git a/plans/mod-on-sx.md b/plans/mod-on-sx.md index 7ff76ea3..5e074024 100644 --- a/plans/mod-on-sx.md +++ b/plans/mod-on-sx.md @@ -16,7 +16,7 @@ federation extension. ## Status (rolling) -`bash lib/mod/conformance.sh` → **247/247** (roadmap + 9 extensions complete) +`bash lib/mod/conformance.sh` → **260/260** (roadmap + 10 extensions complete) ## Ground rules @@ -147,6 +147,11 @@ lib/mod/fed.sx derivation goal-by-goal with `[proved]`/`[unproved]` marks and unification bindings. E.g. `Report rc: escalate (rule: repeated-escalate)` … `[proved] report(rc, B, S), report_count(S, N), N >= 3 {B=ann, N=3, S=dave}`. +- [x] **Ext 10 — policy what-if / impact** (`lib/mod/whatif.sx`, +13). + `mod/decision-diff` compares one report's action under two rule sets; + `mod/policy-impact` runs a batch and returns only the reports whose decision + flips; `mod/impact-count` / `mod/impact-report` summarize. Lets a team measure a + policy change before shipping it (e.g. "removing spam-hide flips r1 hide→keep"). - [x] **Ext 9 — policy dry-run trace** (`lib/mod/trace.sx`, +15). `mod/trace-rules` evaluates a report against every rule and returns each rule's proved/unproved status + its goal-by-goal derivation, so an unproved rule shows which goal @@ -186,6 +191,12 @@ lib/mod/fed.sx ## Progress log +- **Ext 10 — policy what-if / impact, 260/260** (+13). Decisions are now + comparable across rule sets — diff one report, or batch a whole set and surface + only the flips. Pure SX over `decide-report`, no engine change. Closes the + policy-authoring loop alongside lint (Ext 5) and trace (Ext 9): lint checks + well-formedness, trace explains one report, what-if measures a change's blast + radius before it ships. - **Ext 9 — policy dry-run trace, 247/247** (+15). Whole-rule-set diagnostics over the proof machinery: every rule's fire/no-fire and the goal that decided it. The winner agrees with `decide-report` by construction (first proved = pl-query-one),