From ba60db2eef9dfb18edd8f0c5b6bab8e265cbb259 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 08:04:03 +0000 Subject: [PATCH] datalog: reject malformed dict body literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A dict in a rule body that isn't `{:neg }` (the only recognised dict shape) used to silently fall through every dispatch clause in dl-rule-check-safety, contributing zero bound variables. The user would then see a confusing "head variable(s) X do not appear in any positive body literal" pointing at the head — not at the actual bug in the body. Typos like `{:negs ...}` are the typical trigger. dl-process-lit! now flags both: - a dict that lacks :neg - a bare number / string / symbol used as a body lit with a clear error naming the offending literal. 1 new regression test; conformance 265/265. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/datalog/builtins.sx | 22 +++++++++++++++++++++- lib/datalog/scoreboard.json | 8 ++++---- lib/datalog/scoreboard.md | 4 ++-- lib/datalog/tests/eval.sx | 13 +++++++++++++ plans/datalog-on-sx.md | 11 ++++++++++- 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/lib/datalog/builtins.sx b/lib/datalog/builtins.sx index fad58d5a..720a8025 100644 --- a/lib/datalog/builtins.sx +++ b/lib/datalog/builtins.sx @@ -301,6 +301,18 @@ (cond ((and (dict? lit) (has-key? lit :neg)) (dl-process-neg! lit)) + ;; A bare dict that is not a recognised negation is + ;; almost certainly a typo (e.g. `{:negs ...}` instead + ;; of `{:neg ...}`). Without this guard the dict would + ;; silently fall through every clause; the head safety + ;; check would then flag the head variables as unbound + ;; even though the real bug is the malformed body lit. + ((dict? lit) + (set! err + (str "body literal is a dict but lacks :neg — " + "the only dict-shaped body lit recognised is " + "{:neg } for stratified " + "negation, got " lit))) ((dl-aggregate? lit) (dl-process-agg! lit)) ((dl-eq? lit) (dl-process-eq! lit)) ((dl-is? lit) (dl-process-is! lit)) @@ -315,7 +327,15 @@ "syntax; nested `not(...)` is not supported " "(use stratified negation via an " "intermediate relation)"))) - (else (dl-add-bound! (dl-vars-of lit)))))))))) + (else (dl-add-bound! (dl-vars-of lit)))))) + (else + ;; Anything that's not a dict, not a list, or an + ;; empty list. Numbers / strings / symbols as body + ;; lits don't make sense — surface the type. + (set! err + (str "body literal must be a positive lit, " + "built-in, aggregate, or {:neg ...} dict, " + "got " lit))))))) (for-each dl-process-lit! body) (when (nil? err) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 0049c748..8d8671c0 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,13 +1,13 @@ { "lang": "datalog", - "total_passed": 264, + "total_passed": 265, "total_failed": 0, - "total": 264, + "total": 265, "suites": [ {"name":"tokenize","passed":30,"failed":0,"total":30}, {"name":"parse","passed":22,"failed":0,"total":22}, {"name":"unify","passed":29,"failed":0,"total":29}, - {"name":"eval","passed":39,"failed":0,"total":39}, + {"name":"eval","passed":40,"failed":0,"total":40}, {"name":"builtins","passed":24,"failed":0,"total":24}, {"name":"semi_naive","passed":8,"failed":0,"total":8}, {"name":"negation","passed":10,"failed":0,"total":10}, @@ -16,5 +16,5 @@ {"name":"magic","passed":36,"failed":0,"total":36}, {"name":"demo","passed":21,"failed":0,"total":21} ], - "generated": "2026-05-11T07:59:10+00:00" + "generated": "2026-05-11T08:03:45+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index a60c5c5f..21c23323 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,13 +1,13 @@ # datalog scoreboard -**264 / 264 passing** (0 failure(s)). +**265 / 265 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| | tokenize | 30 | 30 | ok | | parse | 22 | 22 | ok | | unify | 29 | 29 | ok | -| eval | 39 | 39 | ok | +| eval | 40 | 40 | ok | | builtins | 24 | 24 | ok | | semi_naive | 8 | 8 | ok | | negation | 10 | 10 | ok | diff --git a/lib/datalog/tests/eval.sx b/lib/datalog/tests/eval.sx index 9b0d6cd8..f859c2b2 100644 --- a/lib/datalog/tests/eval.sx +++ b/lib/datalog/tests/eval.sx @@ -221,6 +221,19 @@ "banned(a). u(a). vip(X) :- u(X), not(not(banned(X)))."))) true) + ;; A dict body literal that isn't `{:neg ...}` is almost always a + ;; typo — it would otherwise silently fall through to a confusing + ;; head-var-unbound safety error. Now caught with a clear message. + (dl-et-test! + "dict body lit without :neg rejected" + (dl-et-throws? + (fn () + (let ((db (dl-make-db))) + (dl-add-rule! db + {:head (list (quote p) (quote X)) + :body (list {:weird "stuff"})})))) + true) + (dl-et-test! "unsafe head var" (dl-et-throws? (fn () (dl-program "p(X, Y) :- q(X)."))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index ac3eb4f7..44f2cd32 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -15,7 +15,7 @@ for rose-ash data (e.g. federation graph, content relationships). ## Status (rolling) -`bash lib/datalog/conformance.sh` → **264/264 across 11 suites** +`bash lib/datalog/conformance.sh` → **265/265 across 11 suites** (tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates, api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API documented in `lib/datalog/datalog.sx`. @@ -320,6 +320,15 @@ large graphs. _Newest first._ +- 2026-05-11 — Body literal shape validation in + `dl-rule-check-safety`: a dict that isn't `{:neg ...}` (e.g. typo'd + `{:negs ...}`) used to silently fall through every dispatch clause, + contributing zero bound vars; the user would then see a confusing + "head var X unbound" error pointing at the head, not the malformed + body. Same for body lits that are bare numbers / strings / symbols. + Both shapes now raise a clear error naming the offending lit. 1 new + regression test; 265/265. + - 2026-05-11 — Division by zero in `is` silently produced IEEE infinity instead of raising. `is(R, /(X, 0))` returned `R = inf`, which then flowed through comparisons and aggregations to produce