From 8dfc987095af950d06f15608ce697c86343db119 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 6 Jun 2026 17:30:50 +0000 Subject: [PATCH] =?UTF-8?q?mod:=20Phase=201=20=E2=80=94=20report=20schema?= =?UTF-8?q?=20+=20policy=20engine=20on=20Prolog,=2031/31?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reports → Prolog facts (report/3, classification/2, report_count/2); ordered policy rules compile to policy_action/3 clauses, first match wins via pl-query-one. Decisions carry their proof (matching rule + conditions + evidence). Spam/abuse keyword classification, repeated-report escalation via Prolog join+arithmetic, no-rule→keep default. Registry api + conformance harness. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/mod/api.sx | 44 ++++++++ lib/mod/conformance.conf | 21 ++++ lib/mod/conformance.sh | 3 + lib/mod/engine.sx | 45 ++++++++ lib/mod/policy.sx | 71 +++++++++++++ lib/mod/schema.sx | 120 ++++++++++++++++++++++ lib/mod/scoreboard.json | 10 ++ lib/mod/scoreboard.md | 7 ++ lib/mod/tests/decide.sx | 215 +++++++++++++++++++++++++++++++++++++++ plans/mod-on-sx.md | 46 ++++++--- 10 files changed, 568 insertions(+), 14 deletions(-) create mode 100644 lib/mod/api.sx create mode 100644 lib/mod/conformance.conf create mode 100755 lib/mod/conformance.sh create mode 100644 lib/mod/engine.sx create mode 100644 lib/mod/policy.sx create mode 100644 lib/mod/schema.sx create mode 100644 lib/mod/scoreboard.json create mode 100644 lib/mod/scoreboard.md create mode 100644 lib/mod/tests/decide.sx diff --git a/lib/mod/api.sx b/lib/mod/api.sx new file mode 100644 index 00000000..929c39ea --- /dev/null +++ b/lib/mod/api.sx @@ -0,0 +1,44 @@ +;; lib/mod/api.sx — report registry + public entry points. +;; +;; mod/report files a report (assigning a sequential id) into the in-memory +;; registry; mod/decide resolves an id and runs the policy engine against the +;; current registry and rule set. + +(define mod/*reports* (list)) +(define mod/*counter* 0) +(define mod/*rules* mod/default-rules) + +(define + mod/reset! + (fn + () + (begin (set! mod/*reports* (list)) (set! mod/*counter* 0)))) + +(define + mod/report + (fn + (by about reason) + (begin + (set! mod/*counter* (+ mod/*counter* 1)) + (let + ((id (str "r" mod/*counter*))) + (let + ((r (mod/mk-report id by about reason))) + (begin (append! mod/*reports* r) r)))))) + +(define + mod/get-report + (fn + (id) + (reduce + (fn (acc r) (if (= (mod/report-id r) id) r acc)) + nil + mod/*reports*))) + +(define + mod/decide + (fn + (id) + (let + ((r (mod/get-report id))) + (if (nil? r) nil (mod/decide-report r mod/*reports* mod/*rules*))))) diff --git a/lib/mod/conformance.conf b/lib/mod/conformance.conf new file mode 100644 index 00000000..b2c0b751 --- /dev/null +++ b/lib/mod/conformance.conf @@ -0,0 +1,21 @@ +# Mod conformance config — sourced by lib/guest/conformance.sh. + +LANG_NAME=mod +MODE=dict + +PRELOADS=( + lib/guest/pratt.sx + lib/prolog/tokenizer.sx + lib/prolog/parser.sx + lib/prolog/runtime.sx + lib/prolog/query.sx + lib/prolog/compiler.sx + lib/mod/schema.sx + lib/mod/policy.sx + lib/mod/engine.sx + lib/mod/api.sx +) + +SUITES=( + "decide:lib/mod/tests/decide.sx:(mod-decide-tests-run!)" +) diff --git a/lib/mod/conformance.sh b/lib/mod/conformance.sh new file mode 100755 index 00000000..79c1452b --- /dev/null +++ b/lib/mod/conformance.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# Thin wrapper — see lib/guest/conformance.sh and lib/mod/conformance.conf. +exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@" diff --git a/lib/mod/engine.sx b/lib/mod/engine.sx new file mode 100644 index 00000000..20e7de5b --- /dev/null +++ b/lib/mod/engine.sx @@ -0,0 +1,45 @@ +;; lib/mod/engine.sx — decide a report by querying the policy program. +;; +;; build-program assembles the report's facts plus the compiled policy clauses; +;; decide-report runs the Prolog query and returns a decision. A decision is a +;; proof, not a bare keyword: it carries the matching rule, the conditions it +;; required, the evidence that satisfied them, and the report count — everything +;; Phase 2's audit trail needs to persist a "why". + +(define + mod/find-rule + (fn + (rules name) + (reduce + (fn + (acc r) + (if (nil? acc) (if (= (mod/rule-name r) name) r acc) acc)) + nil + rules))) + +(define + mod/build-program + (fn + (r count rules) + (str (mod/report-facts r count) "\n" (mod/rules->program rules)))) + +(define + mod/decide-report + (fn + (r reports rules) + (let + ((count (mod/report-count (mod/report-about r) reports)) + (kinds (mod/classify-keywords r)) + (id (mod/report-id r))) + (let + ((program (mod/build-program r count rules))) + (let + ((db (pl-load program))) + (let + ((sol (pl-query-one db (str "policy_action(" id ", Action, Rule)")))) + (if + (nil? sol) + {:action "keep" :proof {:evidence kinds :conditions (list) :rule "none" :count count} :report-id id :rule "none"} + (let + ((rname (dict-get sol "Rule"))) + (let ((rule (mod/find-rule rules rname))) {:action (mod/rule-action rule) :proof {:evidence kinds :conditions (mod/rule-when rule) :rule rname :count count} :report-id id :rule rname}))))))))) diff --git a/lib/mod/policy.sx b/lib/mod/policy.sx new file mode 100644 index 00000000..2649864e --- /dev/null +++ b/lib/mod/policy.sx @@ -0,0 +1,71 @@ +;; lib/mod/policy.sx — moderation rules → Prolog clauses. +;; +;; A rule is {:name :action :when}. :when is a list of condition forms; each +;; compiles to a Prolog goal. Rule order is precedence: the engine queries with +;; pl-query-one, so the first clause that proves wins. The final default rule has +;; an empty body (true) so every report yields at least :keep — "no rule matched" +;; is a real result, not a query failure. + +(define mod/mk-rule (fn (name action conds) {:when conds :name name :action action})) + +(define mod/rule-name (fn (r) (get r :name))) +(define mod/rule-action (fn (r) (get r :action))) +(define mod/rule-when (fn (r) (get r :when))) + +(define + mod/default-rules + (list + (mod/mk-rule "spam-hide" :hide (list (list :classification "spam"))) + (mod/mk-rule + "abuse-remove" + :remove (list (list :classification "abuse"))) + (mod/mk-rule + "repeated-escalate" + :escalate (list (list :count-at-least 3))) + (mod/mk-rule "default-keep" :keep (list)))) + +;; ── condition → Prolog goal ── +;; +;; (:classification "spam") → classification(Id, spam) +;; (:count-at-least 3) → report(Id, B, S), report_count(S, N), N >= 3 + +(define + mod/cond->goal + (fn + (c) + (let + ((tag (first c))) + (cond + ((= tag :classification) + (str "classification(Id, " (nth c 1) ")")) + ((= tag :count-at-least) + (str + "report(Id, B, S), report_count(S, N), N >= " + (nth c 1))) + (true "true"))))) + +(define + mod/conds->body + (fn + (conds) + (if + (empty? conds) + "true" + (mod/join-with ", " (map mod/cond->goal conds))))) + +(define + mod/rule->clause + (fn + (r) + (str + "policy_action(Id, " + (mod/rule-action r) + ", '" + (mod/rule-name r) + "') :- " + (mod/conds->body (mod/rule-when r)) + "."))) + +(define + mod/rules->program + (fn (rules) (mod/join-with "\n" (map mod/rule->clause rules)))) diff --git a/lib/mod/schema.sx b/lib/mod/schema.sx new file mode 100644 index 00000000..c675a583 --- /dev/null +++ b/lib/mod/schema.sx @@ -0,0 +1,120 @@ +;; lib/mod/schema.sx — report representation + Prolog fact generation. +;; +;; A report is a dict {:id :by :about :reason}. The engine derives evidence +;; (classification kinds) from the reason text, then projects the report and its +;; evidence into Prolog facts that policy clauses can match against. + +(define mod/mk-report (fn (id by about reason) {:id id :by by :about about :reason reason})) + +(define mod/report-id (fn (r) (get r :id))) +(define mod/report-by (fn (r) (get r :by))) +(define mod/report-about (fn (r) (get r :about))) +(define mod/report-reason (fn (r) (get r :reason))) + +;; ── substring search (the prolog-loaded env lacks includes?; slice/len do work) ── + +(define + mod/contains-at? + (fn + (hay needle hl nl pos) + (if + (< hl (+ pos nl)) + false + (if + (= (slice hay pos (+ pos nl)) needle) + true + (mod/contains-at? hay needle hl nl (+ pos 1)))))) + +(define + mod/str-contains? + (fn + (hay needle) + (let + ((hl (len hay)) (nl (len needle))) + (if + (= nl 0) + true + (mod/contains-at? hay needle hl nl 0))))) + +;; ── evidence derivation (keyword classification) ── + +(define + mod/spam-keywords + (list "spam" "buy now" "click here" "free money" "viagra" "limited offer")) + +(define + mod/abuse-keywords + (list "abuse" "harassment" "threat" "slur" "hate speech")) + +(define + mod/any? + (fn (pred coll) (reduce (fn (acc x) (if acc acc (pred x))) false coll))) + +(define + mod/reason-matches? + (fn + (reason kws) + (let + ((low (downcase reason))) + (mod/any? (fn (k) (mod/str-contains? low k)) kws)))) + +(define + mod/classify-keywords + (fn + (r) + (let + ((reason (mod/report-reason r)) (kinds (list))) + (begin + (when + (mod/reason-matches? reason mod/spam-keywords) + (append! kinds "spam")) + (when + (mod/reason-matches? reason mod/abuse-keywords) + (append! kinds "abuse")) + kinds)))) + +(define + mod/report-count + (fn + (about reports) + (reduce + (fn + (acc r) + (if (= (mod/report-about r) about) (+ acc 1) acc)) + 0 + reports))) + +;; ── Prolog fact projection ── + +(define + mod/join-with + (fn + (sep items) + (reduce (fn (acc x) (if (= acc "") x (str acc sep x))) "" items))) + +(define mod/pl-quote (fn (s) (str "'" s "'"))) + +(define + mod/classification-facts + (fn + (id kinds) + (mod/join-with + "\n" + (map (fn (k) (str "classification(" id ", " k ").")) kinds)))) + +(define + mod/report-facts + (fn + (r count) + (let + ((id (mod/report-id r)) + (by (mod/pl-quote (mod/report-by r))) + (about (mod/pl-quote (mod/report-about r)))) + (let + ((cls (mod/classification-facts id (mod/classify-keywords r)))) + (mod/join-with + "\n" + (list + (str "report(" id ", " by ", " about ").") + (str "report_count(" about ", " count ").") + cls)))))) diff --git a/lib/mod/scoreboard.json b/lib/mod/scoreboard.json new file mode 100644 index 00000000..d4374712 --- /dev/null +++ b/lib/mod/scoreboard.json @@ -0,0 +1,10 @@ +{ + "lang": "mod", + "total_passed": 31, + "total_failed": 0, + "total": 31, + "suites": [ + {"name":"decide","passed":31,"failed":0,"total":31} + ], + "generated": "2026-06-06T17:30:06+00:00" +} diff --git a/lib/mod/scoreboard.md b/lib/mod/scoreboard.md new file mode 100644 index 00000000..36d7bba3 --- /dev/null +++ b/lib/mod/scoreboard.md @@ -0,0 +1,7 @@ +# mod scoreboard + +**31 / 31 passing** (0 failure(s)). + +| Suite | Passed | Total | Status | +|-------|--------|-------|--------| +| decide | 31 | 31 | ok | diff --git a/lib/mod/tests/decide.sx b/lib/mod/tests/decide.sx new file mode 100644 index 00000000..d903a10e --- /dev/null +++ b/lib/mod/tests/decide.sx @@ -0,0 +1,215 @@ +;; lib/mod/tests/decide.sx — Phase 1: report representation + simple policy. + +(define mod-dec-count 0) +(define mod-dec-pass 0) +(define mod-dec-fail 0) +(define mod-dec-failures (list)) + +(define + mod-dec-test! + (fn + (name got expected) + (begin + (set! mod-dec-count (+ mod-dec-count 1)) + (if + (= got expected) + (set! mod-dec-pass (+ mod-dec-pass 1)) + (begin + (set! mod-dec-fail (+ mod-dec-fail 1)) + (append! + mod-dec-failures + (str name "\n expected: " expected "\n got: " got))))))) + +;; decide a single report (count over a 1-element registry) +(define + mod-dec-one + (fn + (reason) + (let + ((r (mod/mk-report "r1" "alice" "bob" reason))) + (mod/decide-report r (list r) mod/default-rules)))) + +(define mod-dec-action (fn (reason) (get (mod-dec-one reason) :action))) + +;; ── spam keyword → :hide ── + +(mod-dec-test! + "spam keyword 'spam' → hide" + (mod-dec-action "this is spam") + "hide") +(mod-dec-test! + "spam keyword 'buy now' → hide" + (mod-dec-action "buy now while stocks last") + "hide") +(mod-dec-test! + "spam keyword case-insensitive 'CLICK HERE' → hide" + (mod-dec-action "CLICK HERE now") + "hide") +(mod-dec-test! + "spam keyword 'free money' → hide" + (mod-dec-action "win free money fast") + "hide") + +;; ── abuse keyword → :remove ── + +(mod-dec-test! + "abuse keyword 'harassment' → remove" + (mod-dec-action "ongoing harassment of users") + "remove") +(mod-dec-test! + "abuse keyword 'threat' → remove" + (mod-dec-action "this is a threat") + "remove") +(mod-dec-test! + "abuse keyword 'slur' → remove" + (mod-dec-action "contains a slur") + "remove") + +;; ── no rule → :keep ── + +(mod-dec-test! + "neutral reason → keep" + (mod-dec-action "I disagree with this post") + "keep") +(mod-dec-test! "empty reason → keep" (mod-dec-action "") "keep") + +;; ── decision carries the matching rule (proof, not bare keyword) ── + +(mod-dec-test! + "spam decision rule name" + (get (mod-dec-one "this is spam") :rule) + "spam-hide") +(mod-dec-test! + "keep decision rule name" + (get (mod-dec-one "fine post") :rule) + "default-keep") +(mod-dec-test! + "abuse decision rule name" + (get (mod-dec-one "harassment here") :rule) + "abuse-remove") +(mod-dec-test! + "spam proof :rule" + (get (get (mod-dec-one "spam!") :proof) :rule) + "spam-hide") +(mod-dec-test! + "spam proof :evidence" + (get (get (mod-dec-one "spam!") :proof) :evidence) + (list "spam")) +(mod-dec-test! + "spam proof :count" + (get (get (mod-dec-one "spam!") :proof) :count) + 1) + +;; ── classification (evidence derivation) ── + +(mod-dec-test! + "classify spam" + (mod/classify-keywords (mod/mk-report "r1" "a" "b" "spam!")) + (list "spam")) +(mod-dec-test! + "classify abuse" + (mod/classify-keywords (mod/mk-report "r1" "a" "b" "abuse")) + (list "abuse")) +(mod-dec-test! + "classify neutral → empty" + (mod/classify-keywords (mod/mk-report "r1" "a" "b" "hello")) + (list)) +(mod-dec-test! + "classify both spam+abuse" + (mod/classify-keywords (mod/mk-report "r1" "a" "b" "spam and abuse")) + (list "spam" "abuse")) + +;; ── report-count + repeated → :escalate ── + +(define + mod-dec-three + (list + (mod/mk-report "r1" "a" "bob" "x") + (mod/mk-report "r2" "c" "bob" "y") + (mod/mk-report "r3" "d" "bob" "z"))) + +(mod-dec-test! + "report-count counts subject" + (mod/report-count "bob" mod-dec-three) + 3) +(mod-dec-test! + "3 reports about subject → escalate" + (get + (mod/decide-report (first mod-dec-three) mod-dec-three mod/default-rules) + :action) + "escalate") +(mod-dec-test! + "escalate rule name" + (get + (mod/decide-report (first mod-dec-three) mod-dec-three mod/default-rules) + :rule) + "repeated-escalate") + +(define + mod-dec-two + (list + (mod/mk-report "r1" "a" "carol" "x") + (mod/mk-report "r2" "c" "carol" "y"))) + +(mod-dec-test! + "2 reports about subject → keep (below threshold)" + (get + (mod/decide-report (first mod-dec-two) mod-dec-two mod/default-rules) + :action) + "keep") + +;; ── precedence: spam beats repeated ── + +(define + mod-dec-spam-among-many + (list + (mod/mk-report "r1" "a" "dave" "buy now spam") + (mod/mk-report "r2" "c" "dave" "y") + (mod/mk-report "r3" "d" "dave" "z"))) + +(mod-dec-test! + "spam wins over repeated (precedence)" + (get + (mod/decide-report + (first mod-dec-spam-among-many) + mod-dec-spam-among-many + mod/default-rules) + :action) + "hide") + +;; ── accessors ── + +(mod-dec-test! + "report-about accessor" + (mod/report-about (mod/mk-report "r1" "a" "bob" "x")) + "bob") +(mod-dec-test! + "report-by accessor" + (mod/report-by (mod/mk-report "r1" "alice" "bob" "x")) + "alice") + +;; ── api registry ── + +(mod/reset!) +(define mod-dec-r1 (mod/report "alice" "bob" "this is spam")) +(define mod-dec-r2 (mod/report "carol" "eve" "fine post")) + +(mod-dec-test! + "mod/report assigns sequential id r1" + (mod/report-id mod-dec-r1) + "r1") +(mod-dec-test! + "mod/report assigns sequential id r2" + (mod/report-id mod-dec-r2) + "r2") +(mod-dec-test! + "mod/decide via registry → hide" + (get (mod/decide "r1") :action) + "hide") +(mod-dec-test! + "mod/decide via registry → keep" + (get (mod/decide "r2") :action) + "keep") +(mod-dec-test! "mod/decide unknown id → nil" (mod/decide "r99") nil) + +(define mod-decide-tests-run! (fn () {:failures mod-dec-failures :total mod-dec-count :passed mod-dec-pass :failed mod-dec-fail})) diff --git a/plans/mod-on-sx.md b/plans/mod-on-sx.md index 30887b50..9f03ed8b 100644 --- a/plans/mod-on-sx.md +++ b/plans/mod-on-sx.md @@ -16,7 +16,7 @@ federation extension. ## Status (rolling) -`bash lib/mod/conformance.sh` → **0/0** (not yet started) +`bash lib/mod/conformance.sh` → **31/31** (Phase 1 complete) ## Ground rules @@ -66,17 +66,19 @@ lib/mod/fed.sx ## Phase 1 — Report representation + simple policy -- [ ] `lib/mod/schema.sx` — `report(id, by, about, reason)`, `evidence(id, kind, val)`, - `policy-action(report, action)` predicates as Prolog facts/rules -- [ ] `lib/mod/policy.sx` — rule declarations: `(defrule action :when conditions)` - desugars to Prolog clause -- [ ] `lib/mod/engine.sx` — `(decide report-id)` runs Prolog query, returns first - matching action -- [ ] `lib/mod/api.sx` — `(mod/report by about reason)`, `(mod/decide id)` -- [ ] `lib/mod/tests/decide.sx` — 15+ cases: spam keyword → hide, repeated reports → - escalate, no rule matches → keep -- [ ] `lib/mod/scoreboard.{json,md}` -- [ ] `lib/mod/conformance.sh` +- [x] `lib/mod/schema.sx` — `report(id, by, about)`, `classification(id, kind)`, + `report_count(subject, n)` Prolog facts; keyword classifier derives evidence +- [x] `lib/mod/policy.sx` — `mod/mk-rule` + ordered `mod/default-rules`; conditions + (`:classification`, `:count-at-least`) compile to Prolog goals; `policy_action/3` + clauses, last clause `true` so every report yields at least `:keep` +- [x] `lib/mod/engine.sx` — `(mod/decide-report r reports rules)` queries + `policy_action(Id, Action, Rule)` with `pl-query-one` (clause order = precedence); + returns a decision dict `{:action :rule :report-id :proof}` carrying the why +- [x] `lib/mod/api.sx` — registry + `(mod/report by about reason)`, `(mod/decide id)` +- [x] `lib/mod/tests/decide.sx` — 31 cases: spam/abuse keyword, repeated→escalate, + no-rule→keep, precedence (spam beats repeated), proof shape, registry ids +- [x] `lib/mod/scoreboard.{json,md}` +- [x] `lib/mod/conformance.sh` ## Phase 2 — Evidence + audit trail @@ -105,8 +107,24 @@ lib/mod/fed.sx ## Progress log -(loop fills this in) +- **Phase 1 complete — 31/31.** Report schema, keyword classifier, policy DSL, + engine, registry api, conformance harness. Decisions are proofs: each carries + `:rule` (matching clause), `:proof {:rule :conditions :evidence :count}`. + Precedence is Prolog clause order resolved by `pl-query-one`; a trailing + `true`-bodied default rule makes "no rule matched" a real `:keep`, not a query + failure. Evidence (spam/abuse classification) derived in SX and asserted as + `classification/2` facts; repeated-report escalation uses a genuine Prolog + join + arithmetic (`report(Id,_,S), report_count(S,N), N >= 3`). + - **Gotcha (env):** loading the prolog libs strips `includes?` (and other + high-level string prims) from the eval env — only the set the prolog + tokenizer itself uses survives (`slice`, `len`, `nth`, `=`, `join`, + `downcase`, `map`, `reduce`, `append!`). Implemented `mod/str-contains?` over + `slice`/`len` rather than relying on `includes?`. Watch for this in later + phases — stick to the blessed primitive set. + - **Liftable (acl-sx watch):** `mod/join-with`, `mod/str-contains?`, `mod/any?`, + and the rule→clause compilation shape are generic rule-engine plumbing. Do not + extract to `lib/guest/` until both mod-sx and acl-sx are past Phase 2. ## Blockers -(loop fills this in) +(none)