Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 23s
Quoted atoms with uppercase- or underscore-leading names were
misclassified as variables. `p('Hello World').` flowed through the
tokenizer's "atom" branch and through the parser's string->symbol,
producing a symbol named "Hello World". dl-var? inspects the first
character — "H" is uppercase, so the fact was rejected as non-ground
("expected ground literal").
Tokenizer now emits "string" for any '...' quoted form. Quoted atoms
become opaque string constants — matching how Datalog idiomatically
treats them, and avoiding a per-symbol "quoted" marker that would
have rippled through unification and dl-var?. The trade-off is that
'a' and a are no longer the same value (string vs symbol); for
Datalog this is the safer default.
Updated the existing "quoted atom" tokenize test, added a regression
case for an uppercase-named quoted atom, and a parse-level test that
verifies the AST. Conformance 269/269.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
190 lines
5.8 KiB
Plaintext
190 lines
5.8 KiB
Plaintext
;; lib/datalog/tests/tokenize.sx — tokenizer unit tests
|
|
;;
|
|
;; Run via: bash lib/datalog/conformance.sh
|
|
;; Or: (load "lib/datalog/tokenizer.sx") (load "lib/datalog/tests/tokenize.sx")
|
|
;; (dl-tokenize-tests-run!)
|
|
|
|
(define dl-tk-pass 0)
|
|
(define dl-tk-fail 0)
|
|
(define dl-tk-failures (list))
|
|
|
|
(define
|
|
dl-tk-test!
|
|
(fn
|
|
(name got expected)
|
|
(if
|
|
(= got expected)
|
|
(set! dl-tk-pass (+ dl-tk-pass 1))
|
|
(do
|
|
(set! dl-tk-fail (+ dl-tk-fail 1))
|
|
(append!
|
|
dl-tk-failures
|
|
(str name "\n expected: " expected "\n got: " got))))))
|
|
|
|
(define dl-tk-types (fn (toks) (map (fn (t) (get t :type)) toks)))
|
|
(define dl-tk-values (fn (toks) (map (fn (t) (get t :value)) toks)))
|
|
|
|
(define
|
|
dl-tk-run-all!
|
|
(fn
|
|
()
|
|
(do
|
|
(dl-tk-test! "empty" (dl-tk-types (dl-tokenize "")) (list "eof"))
|
|
(dl-tk-test!
|
|
"atom dot"
|
|
(dl-tk-types (dl-tokenize "foo."))
|
|
(list "atom" "punct" "eof"))
|
|
(dl-tk-test!
|
|
"atom dot value"
|
|
(dl-tk-values (dl-tokenize "foo."))
|
|
(list "foo" "." nil))
|
|
(dl-tk-test!
|
|
"var"
|
|
(dl-tk-types (dl-tokenize "X."))
|
|
(list "var" "punct" "eof"))
|
|
(dl-tk-test!
|
|
"underscore var"
|
|
(dl-tk-types (dl-tokenize "_x."))
|
|
(list "var" "punct" "eof"))
|
|
(dl-tk-test!
|
|
"integer"
|
|
(dl-tk-values (dl-tokenize "42"))
|
|
(list 42 nil))
|
|
(dl-tk-test!
|
|
"decimal"
|
|
(dl-tk-values (dl-tokenize "3.14"))
|
|
(list 3.14 nil))
|
|
(dl-tk-test!
|
|
"string"
|
|
(dl-tk-values (dl-tokenize "\"hello\""))
|
|
(list "hello" nil))
|
|
;; Quoted 'atoms' tokenize as strings — see the type-table
|
|
;; comment in lib/datalog/tokenizer.sx for the rationale.
|
|
(dl-tk-test!
|
|
"quoted atom as string"
|
|
(dl-tk-types (dl-tokenize "'two words'"))
|
|
(list "string" "eof"))
|
|
(dl-tk-test!
|
|
"quoted atom value"
|
|
(dl-tk-values (dl-tokenize "'two words'"))
|
|
(list "two words" nil))
|
|
;; A quoted atom whose name would otherwise be a variable
|
|
;; (uppercase / leading underscore) is now safely a string —
|
|
;; this was the bug that motivated the type change.
|
|
(dl-tk-test!
|
|
"quoted Uppercase as string"
|
|
(dl-tk-types (dl-tokenize "'Hello'"))
|
|
(list "string" "eof"))
|
|
(dl-tk-test! ":-" (dl-tk-values (dl-tokenize ":-")) (list ":-" nil))
|
|
(dl-tk-test! "?-" (dl-tk-values (dl-tokenize "?-")) (list "?-" nil))
|
|
(dl-tk-test! "<=" (dl-tk-values (dl-tokenize "<=")) (list "<=" nil))
|
|
(dl-tk-test! ">=" (dl-tk-values (dl-tokenize ">=")) (list ">=" nil))
|
|
(dl-tk-test! "!=" (dl-tk-values (dl-tokenize "!=")) (list "!=" nil))
|
|
(dl-tk-test!
|
|
"single op values"
|
|
(dl-tk-values (dl-tokenize "< > = + - * /"))
|
|
(list "<" ">" "=" "+" "-" "*" "/" nil))
|
|
(dl-tk-test!
|
|
"single op types"
|
|
(dl-tk-types (dl-tokenize "< > = + - * /"))
|
|
(list "op" "op" "op" "op" "op" "op" "op" "eof"))
|
|
(dl-tk-test!
|
|
"punct"
|
|
(dl-tk-values (dl-tokenize "( ) , ."))
|
|
(list "(" ")" "," "." nil))
|
|
(dl-tk-test!
|
|
"fact tokens"
|
|
(dl-tk-types (dl-tokenize "parent(tom, bob)."))
|
|
(list "atom" "punct" "atom" "punct" "atom" "punct" "punct" "eof"))
|
|
(dl-tk-test!
|
|
"rule shape"
|
|
(dl-tk-types (dl-tokenize "p(X) :- q(X)."))
|
|
(list
|
|
"atom"
|
|
"punct"
|
|
"var"
|
|
"punct"
|
|
"op"
|
|
"atom"
|
|
"punct"
|
|
"var"
|
|
"punct"
|
|
"punct"
|
|
"eof"))
|
|
(dl-tk-test!
|
|
"comparison literal"
|
|
(dl-tk-values (dl-tokenize "<(X, 5)"))
|
|
(list "<" "(" "X" "," 5 ")" nil))
|
|
(dl-tk-test!
|
|
"is form"
|
|
(dl-tk-values (dl-tokenize "is(Y, +(X, 1))"))
|
|
(list "is" "(" "Y" "," "+" "(" "X" "," 1 ")" ")" nil))
|
|
(dl-tk-test!
|
|
"line comment"
|
|
(dl-tk-types (dl-tokenize "% comment line\nfoo."))
|
|
(list "atom" "punct" "eof"))
|
|
(dl-tk-test!
|
|
"block comment"
|
|
(dl-tk-types (dl-tokenize "/* a\nb */ x."))
|
|
(list "atom" "punct" "eof"))
|
|
;; Unexpected characters surface at tokenize time rather
|
|
;; than being silently consumed (previously `?(X)` parsed as
|
|
;; if the leading `?` weren't there).
|
|
(dl-tk-test!
|
|
"unexpected char raises"
|
|
(let ((threw false))
|
|
(do
|
|
(guard (e (#t (set! threw true)))
|
|
(dl-tokenize "?(X)"))
|
|
threw))
|
|
true)
|
|
|
|
;; Unterminated string / quoted-atom must raise.
|
|
(dl-tk-test!
|
|
"unterminated string raises"
|
|
(let ((threw false))
|
|
(do
|
|
(guard (e (#t (set! threw true)))
|
|
(dl-tokenize "\"unclosed"))
|
|
threw))
|
|
true)
|
|
|
|
(dl-tk-test!
|
|
"unterminated quoted atom raises"
|
|
(let ((threw false))
|
|
(do
|
|
(guard (e (#t (set! threw true)))
|
|
(dl-tokenize "'unclosed"))
|
|
threw))
|
|
true)
|
|
|
|
;; Unterminated block comment must raise — previously it was
|
|
;; silently consumed to EOF.
|
|
(dl-tk-test!
|
|
"unterminated block comment raises"
|
|
(let ((threw false))
|
|
(do
|
|
(guard (e (#t (set! threw true)))
|
|
(dl-tokenize "/* unclosed comment"))
|
|
threw))
|
|
true)
|
|
(dl-tk-test!
|
|
"whitespace"
|
|
(dl-tk-types (dl-tokenize " foo ,\t bar ."))
|
|
(list "atom" "punct" "atom" "punct" "eof"))
|
|
(dl-tk-test!
|
|
"positions"
|
|
(map (fn (t) (get t :pos)) (dl-tokenize "foo bar"))
|
|
(list 0 4 7)))))
|
|
|
|
(define
|
|
dl-tokenize-tests-run!
|
|
(fn
|
|
()
|
|
(do
|
|
(set! dl-tk-pass 0)
|
|
(set! dl-tk-fail 0)
|
|
(set! dl-tk-failures (list))
|
|
(dl-tk-run-all!)
|
|
{:failures dl-tk-failures :total (+ dl-tk-pass dl-tk-fail) :passed dl-tk-pass :failed dl-tk-fail})))
|