diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 9c394d50..40f37987 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,11 +1,11 @@ { "lang": "datalog", - "total_passed": 267, + "total_passed": 269, "total_failed": 0, - "total": 267, + "total": 269, "suites": [ - {"name":"tokenize","passed":30,"failed":0,"total":30}, - {"name":"parse","passed":22,"failed":0,"total":22}, + {"name":"tokenize","passed":31,"failed":0,"total":31}, + {"name":"parse","passed":23,"failed":0,"total":23}, {"name":"unify","passed":29,"failed":0,"total":29}, {"name":"eval","passed":40,"failed":0,"total":40}, {"name":"builtins","passed":26,"failed":0,"total":26}, @@ -16,5 +16,5 @@ {"name":"magic","passed":36,"failed":0,"total":36}, {"name":"demo","passed":21,"failed":0,"total":21} ], - "generated": "2026-05-11T08:07:23+00:00" + "generated": "2026-05-11T08:39:03+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 89a66d4e..174fc06d 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,11 +1,11 @@ # datalog scoreboard -**267 / 267 passing** (0 failure(s)). +**269 / 269 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| -| tokenize | 30 | 30 | ok | -| parse | 22 | 22 | ok | +| tokenize | 31 | 31 | ok | +| parse | 23 | 23 | ok | | unify | 29 | 29 | ok | | eval | 40 | 40 | ok | | builtins | 26 | 26 | ok | diff --git a/lib/datalog/tests/parse.sx b/lib/datalog/tests/parse.sx index 9d0f7201..6fc81b27 100644 --- a/lib/datalog/tests/parse.sx +++ b/lib/datalog/tests/parse.sx @@ -106,6 +106,13 @@ "string arg" (dl-parse "label(x, \"hi\").") (list {:body (list) :head (list (quote label) (quote x) "hi")})) + ;; Quoted 'atoms' parse as strings — a uppercase-starting name + ;; in quotes used to misclassify as a variable and reject the + ;; fact as non-ground. + (dl-pt-test! + "quoted atom arg parses as string" + (dl-parse "p('Hello World').") + (list {:body (list) :head (list (quote p) "Hello World")})) (dl-pt-test! "comparison literal" (dl-parse "p(X) :- <(X, 5).") diff --git a/lib/datalog/tests/tokenize.sx b/lib/datalog/tests/tokenize.sx index 51470a44..6c7b94a9 100644 --- a/lib/datalog/tests/tokenize.sx +++ b/lib/datalog/tests/tokenize.sx @@ -58,14 +58,23 @@ "string" (dl-tk-values (dl-tokenize "\"hello\"")) (list "hello" nil)) + ;; Quoted 'atoms' tokenize as strings — see the type-table + ;; comment in lib/datalog/tokenizer.sx for the rationale. (dl-tk-test! - "quoted atom" + "quoted atom as string" (dl-tk-types (dl-tokenize "'two words'")) - (list "atom" "eof")) + (list "string" "eof")) (dl-tk-test! "quoted atom value" (dl-tk-values (dl-tokenize "'two words'")) (list "two words" nil)) + ;; A quoted atom whose name would otherwise be a variable + ;; (uppercase / leading underscore) is now safely a string — + ;; this was the bug that motivated the type change. + (dl-tk-test! + "quoted Uppercase as string" + (dl-tk-types (dl-tokenize "'Hello'")) + (list "string" "eof")) (dl-tk-test! ":-" (dl-tk-values (dl-tokenize ":-")) (list ":-" nil)) (dl-tk-test! "?-" (dl-tk-values (dl-tokenize "?-")) (list "?-" nil)) (dl-tk-test! "<=" (dl-tk-values (dl-tokenize "<=")) (list "<=" nil)) diff --git a/lib/datalog/tokenizer.sx b/lib/datalog/tokenizer.sx index a10a7ff7..cf0aa730 100644 --- a/lib/datalog/tokenizer.sx +++ b/lib/datalog/tokenizer.sx @@ -2,10 +2,13 @@ ;; ;; Tokens: {:type T :value V :pos P} ;; Types: -;; "atom" — lowercase-start ident or quoted 'atom' +;; "atom" — lowercase-start bare identifier ;; "var" — uppercase-start or _-start ident (value is the name) ;; "number" — numeric literal (decoded to number) -;; "string" — "..." string literal +;; "string" — "..." string literal OR quoted 'atom' (treated as a +;; string value to avoid the var-vs-atom ambiguity that +;; would arise from a quoted atom whose name starts with +;; an uppercase letter or underscore) ;; "punct" — ( ) , . ;; "op" — :- ?- <= >= != < > = + - * / ;; "eof" @@ -192,7 +195,11 @@ (dl-emit! "number" (read-number start) start) (scan!))) ((= ch "'") - (do (dl-emit! "atom" (read-quoted "'") start) (scan!))) + ;; Quoted 'atoms' tokenize as strings so a name + ;; like 'Hello World' doesn't get misclassified + ;; as a variable by dl-var? (which inspects the + ;; symbol's first character). + (do (dl-emit! "string" (read-quoted "'") start) (scan!))) ((= ch "\"") (do (dl-emit! "string" (read-quoted "\"") start) (scan!))) ((dl-lower? ch) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index a07670c2..29572e5c 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -15,7 +15,7 @@ for rose-ash data (e.g. federation graph, content relationships). ## Status (rolling) -`bash lib/datalog/conformance.sh` → **267/267 across 11 suites** +`bash lib/datalog/conformance.sh` → **269/269 across 11 suites** (tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates, api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API documented in `lib/datalog/datalog.sx`. @@ -320,6 +320,18 @@ large graphs. _Newest first._ +- 2026-05-11 — Quoted atoms with uppercase-or-underscore-leading + names were misclassified as variables. `p('Hello World').` ran + through the tokenizer's `"atom"` branch and through the parser's + `string->symbol`, producing a symbol named "Hello World". dl-var? + checks the first character — "H" is uppercase, so the fact was + rejected as non-ground. Fix: tokenizer emits `"string"` for any + `'...'` quoted form, so quoted atoms become opaque string constants + (matching how Datalog idiomatically treats them — the alternative + was a per-symbol "quoted" marker which would have rippled through + unification and dl-var?). Updated the existing tokenize test and + added one for `'Hello'`; also added a parse-level regression. 269/269. + - 2026-05-11 — Type-mixed comparisons were silently inconsistent: `<(X, 5)` with `X` bound to a string returned `()` (no result, no error), while `X` bound to a symbol raised "Expected number, got