From 9e380fd96edf29b4cdc6e2abf3f390c752326e7f Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 07:57:01 +0000 Subject: [PATCH] datalog: aggregate validates that agg-var appears in goal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `count(N, Y, p(X))` silently returned `N = 1` because `Y` was never bound by the goal — every match contributed the same unbound symbol which dl-val-member? deduped to a single entry. Similarly: sum(S, Y, p(X)) => raises "expected number, got symbol" findall(L, Y, p(X)) => L = (Y) (a list containing the unbound symbol) count(N, Y, p(X)) => N = 1 (silent garbage) Added a third validator in dl-eval-aggregate: the agg-var must syntactically appear among the goal's variables. Error names the variable and the goal and explains why the result would be meaningless. 1 new test; conformance 263/263. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/datalog/aggregates.sx | 8 ++++++++ lib/datalog/scoreboard.json | 8 ++++---- lib/datalog/scoreboard.md | 4 ++-- lib/datalog/tests/aggregates.sx | 13 +++++++++++++ plans/datalog-on-sx.md | 11 ++++++++++- 5 files changed, 37 insertions(+), 7 deletions(-) diff --git a/lib/datalog/aggregates.sx b/lib/datalog/aggregates.sx index 4911a330..0e62e536 100644 --- a/lib/datalog/aggregates.sx +++ b/lib/datalog/aggregates.sx @@ -113,6 +113,14 @@ (error (str "datalog aggregate (" op "): third arg must be a positive literal, got " goal))) + ((not (dl-member-string? + (symbol->string agg-var) + (dl-vars-of goal))) + (error (str "datalog aggregate (" op + "): aggregation variable " agg-var + " does not appear in the goal " goal + " — without it every match contributes the same " + "(unbound) value and the result is meaningless"))) (else (let ((vals (list))) (do diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 558a8a8e..8352bdbf 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,8 +1,8 @@ { "lang": "datalog", - "total_passed": 262, + "total_passed": 263, "total_failed": 0, - "total": 262, + "total": 263, "suites": [ {"name":"tokenize","passed":30,"failed":0,"total":30}, {"name":"parse","passed":22,"failed":0,"total":22}, @@ -11,10 +11,10 @@ {"name":"builtins","passed":23,"failed":0,"total":23}, {"name":"semi_naive","passed":8,"failed":0,"total":8}, {"name":"negation","passed":10,"failed":0,"total":10}, - {"name":"aggregates","passed":22,"failed":0,"total":22}, + {"name":"aggregates","passed":23,"failed":0,"total":23}, {"name":"api","passed":22,"failed":0,"total":22}, {"name":"magic","passed":36,"failed":0,"total":36}, {"name":"demo","passed":21,"failed":0,"total":21} ], - "generated": "2026-05-11T07:50:41+00:00" + "generated": "2026-05-11T07:56:45+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 1b0e23de..7bb4ab2b 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,6 +1,6 @@ # datalog scoreboard -**262 / 262 passing** (0 failure(s)). +**263 / 263 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -11,7 +11,7 @@ | builtins | 23 | 23 | ok | | semi_naive | 8 | 8 | ok | | negation | 10 | 10 | ok | -| aggregates | 22 | 22 | ok | +| aggregates | 23 | 23 | ok | | api | 22 | 22 | ok | | magic | 36 | 36 | ok | | demo | 21 | 21 | ok | diff --git a/lib/datalog/tests/aggregates.sx b/lib/datalog/tests/aggregates.sx index 45b3ab88..42c687a6 100644 --- a/lib/datalog/tests/aggregates.sx +++ b/lib/datalog/tests/aggregates.sx @@ -256,6 +256,19 @@ (fn () (dl-eval "p(1). q(N) :- count(N, X, 42)." "?- q(N)."))) true) + ;; Aggregate validates that the agg-var (2nd arg) appears in the + ;; goal. Without it every match contributes the same unbound + ;; symbol — count silently returns 1, sum raises a confusing + ;; "expected number" error, etc. Catch the mistake at safety + ;; check time instead. + (dl-at-test! "agg-var must appear in goal" + (dl-at-throws? + (fn () + (dl-eval + "p(1). p(2). c(N) :- count(N, Y, p(X))." + "?- c(N)."))) + true) + ;; Indirect recursion through aggregation also rejected. ;; q -> r (via positive lit), r -> q (via aggregate body). ;; The aggregate edge counts as negation for stratification. diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index a245f56d..0082efbd 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -15,7 +15,7 @@ for rose-ash data (e.g. federation graph, content relationships). ## Status (rolling) -`bash lib/datalog/conformance.sh` → **262/262 across 11 suites** +`bash lib/datalog/conformance.sh` → **263/263 across 11 suites** (tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates, api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API documented in `lib/datalog/datalog.sx`. @@ -320,6 +320,15 @@ large graphs. _Newest first._ +- 2026-05-11 — Aggregate variable validation: `count(N, Y, p(X))` + silently returned `N = 1` because `Y` was never bound in `p(X)` — + every match contributed the same unbound symbol, which dl-val-member? + deduped to a single entry. Similarly `sum(S, Y, p(X))` raised a + confusing "expected number" error from the underlying `+`. Added + a third validator in `dl-eval-aggregate`: the agg-var must appear + in the goal literal. Error names the variable and the goal and + explains the consequence. 1 new test; 263/263. + - 2026-05-11 — `dl-retract!` was silently destroying EDB facts in "mixed" relations (those with BOTH user-asserted facts AND a rule defining the same head). The retract pass wiped every rule-head