datalog: tokenizer + 26 tests (Phase 1)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 54s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 54s
Tokens {:type :value :pos} for atoms, vars, numbers, strings, punct, ops.
Operators :- ?- <= >= != < > = + - * /. Comments % and /* */.
This commit is contained in:
139
lib/datalog/tests/tokenize.sx
Normal file
139
lib/datalog/tests/tokenize.sx
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
;; lib/datalog/tests/tokenize.sx — tokenizer unit tests
|
||||||
|
;;
|
||||||
|
;; Run via: bash lib/datalog/conformance.sh
|
||||||
|
;; Or: (load "lib/datalog/tokenizer.sx") (load "lib/datalog/tests/tokenize.sx")
|
||||||
|
;; (dl-tokenize-tests-run!)
|
||||||
|
|
||||||
|
(define dl-tk-pass 0)
|
||||||
|
(define dl-tk-fail 0)
|
||||||
|
(define dl-tk-failures (list))
|
||||||
|
|
||||||
|
(define
|
||||||
|
dl-tk-test!
|
||||||
|
(fn
|
||||||
|
(name got expected)
|
||||||
|
(if
|
||||||
|
(= got expected)
|
||||||
|
(set! dl-tk-pass (+ dl-tk-pass 1))
|
||||||
|
(do
|
||||||
|
(set! dl-tk-fail (+ dl-tk-fail 1))
|
||||||
|
(append!
|
||||||
|
dl-tk-failures
|
||||||
|
(str name "\n expected: " expected "\n got: " got))))))
|
||||||
|
|
||||||
|
(define dl-tk-types (fn (toks) (map (fn (t) (get t :type)) toks)))
|
||||||
|
(define dl-tk-values (fn (toks) (map (fn (t) (get t :value)) toks)))
|
||||||
|
|
||||||
|
(define
|
||||||
|
dl-tk-run-all!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(do
|
||||||
|
(dl-tk-test! "empty" (dl-tk-types (dl-tokenize "")) (list "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"atom dot"
|
||||||
|
(dl-tk-types (dl-tokenize "foo."))
|
||||||
|
(list "atom" "punct" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"atom dot value"
|
||||||
|
(dl-tk-values (dl-tokenize "foo."))
|
||||||
|
(list "foo" "." nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"var"
|
||||||
|
(dl-tk-types (dl-tokenize "X."))
|
||||||
|
(list "var" "punct" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"underscore var"
|
||||||
|
(dl-tk-types (dl-tokenize "_x."))
|
||||||
|
(list "var" "punct" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"integer"
|
||||||
|
(dl-tk-values (dl-tokenize "42"))
|
||||||
|
(list 42 nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"decimal"
|
||||||
|
(dl-tk-values (dl-tokenize "3.14"))
|
||||||
|
(list 3.14 nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"string"
|
||||||
|
(dl-tk-values (dl-tokenize "\"hello\""))
|
||||||
|
(list "hello" nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"quoted atom"
|
||||||
|
(dl-tk-types (dl-tokenize "'two words'"))
|
||||||
|
(list "atom" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"quoted atom value"
|
||||||
|
(dl-tk-values (dl-tokenize "'two words'"))
|
||||||
|
(list "two words" nil))
|
||||||
|
(dl-tk-test! ":-" (dl-tk-values (dl-tokenize ":-")) (list ":-" nil))
|
||||||
|
(dl-tk-test! "?-" (dl-tk-values (dl-tokenize "?-")) (list "?-" nil))
|
||||||
|
(dl-tk-test! "<=" (dl-tk-values (dl-tokenize "<=")) (list "<=" nil))
|
||||||
|
(dl-tk-test! ">=" (dl-tk-values (dl-tokenize ">=")) (list ">=" nil))
|
||||||
|
(dl-tk-test! "!=" (dl-tk-values (dl-tokenize "!=")) (list "!=" nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"single op values"
|
||||||
|
(dl-tk-values (dl-tokenize "< > = + - * /"))
|
||||||
|
(list "<" ">" "=" "+" "-" "*" "/" nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"single op types"
|
||||||
|
(dl-tk-types (dl-tokenize "< > = + - * /"))
|
||||||
|
(list "op" "op" "op" "op" "op" "op" "op" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"punct"
|
||||||
|
(dl-tk-values (dl-tokenize "( ) , ."))
|
||||||
|
(list "(" ")" "," "." nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"fact tokens"
|
||||||
|
(dl-tk-types (dl-tokenize "parent(tom, bob)."))
|
||||||
|
(list "atom" "punct" "atom" "punct" "atom" "punct" "punct" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"rule shape"
|
||||||
|
(dl-tk-types (dl-tokenize "p(X) :- q(X)."))
|
||||||
|
(list
|
||||||
|
"atom"
|
||||||
|
"punct"
|
||||||
|
"var"
|
||||||
|
"punct"
|
||||||
|
"op"
|
||||||
|
"atom"
|
||||||
|
"punct"
|
||||||
|
"var"
|
||||||
|
"punct"
|
||||||
|
"punct"
|
||||||
|
"eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"comparison literal"
|
||||||
|
(dl-tk-values (dl-tokenize "<(X, 5)"))
|
||||||
|
(list "<" "(" "X" "," 5 ")" nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"is form"
|
||||||
|
(dl-tk-values (dl-tokenize "is(Y, +(X, 1))"))
|
||||||
|
(list "is" "(" "Y" "," "+" "(" "X" "," 1 ")" ")" nil))
|
||||||
|
(dl-tk-test!
|
||||||
|
"line comment"
|
||||||
|
(dl-tk-types (dl-tokenize "% comment line\nfoo."))
|
||||||
|
(list "atom" "punct" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"block comment"
|
||||||
|
(dl-tk-types (dl-tokenize "/* a\nb */ x."))
|
||||||
|
(list "atom" "punct" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"whitespace"
|
||||||
|
(dl-tk-types (dl-tokenize " foo ,\t bar ."))
|
||||||
|
(list "atom" "punct" "atom" "punct" "eof"))
|
||||||
|
(dl-tk-test!
|
||||||
|
"positions"
|
||||||
|
(map (fn (t) (get t :pos)) (dl-tokenize "foo bar"))
|
||||||
|
(list 0 4 7)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
dl-tokenize-tests-run!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(do
|
||||||
|
(set! dl-tk-pass 0)
|
||||||
|
(set! dl-tk-fail 0)
|
||||||
|
(set! dl-tk-failures (list))
|
||||||
|
(dl-tk-run-all!)
|
||||||
|
{:failures dl-tk-failures :total (+ dl-tk-pass dl-tk-fail) :passed dl-tk-pass :failed dl-tk-fail})))
|
||||||
254
lib/datalog/tokenizer.sx
Normal file
254
lib/datalog/tokenizer.sx
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
;; lib/datalog/tokenizer.sx — Datalog source → token stream
|
||||||
|
;;
|
||||||
|
;; Tokens: {:type T :value V :pos P}
|
||||||
|
;; Types:
|
||||||
|
;; "atom" — lowercase-start ident or quoted 'atom'
|
||||||
|
;; "var" — uppercase-start or _-start ident (value is the name)
|
||||||
|
;; "number" — numeric literal (decoded to number)
|
||||||
|
;; "string" — "..." string literal
|
||||||
|
;; "punct" — ( ) , .
|
||||||
|
;; "op" — :- ?- <= >= != < > = + - * /
|
||||||
|
;; "eof"
|
||||||
|
;;
|
||||||
|
;; Datalog has no function symbols in arg position; the parser still
|
||||||
|
;; accepts nested compounds for arithmetic ((is X (+ A B))) but safety
|
||||||
|
;; analysis rejects non-arithmetic nesting at rule-load time.
|
||||||
|
|
||||||
|
(define dl-make-token (fn (type value pos) {:type type :value value :pos pos}))
|
||||||
|
|
||||||
|
(define dl-digit? (fn (c) (and (>= c "0") (<= c "9"))))
|
||||||
|
(define dl-lower? (fn (c) (and (>= c "a") (<= c "z"))))
|
||||||
|
(define dl-upper? (fn (c) (and (>= c "A") (<= c "Z"))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
dl-ident-char?
|
||||||
|
(fn (c) (or (dl-lower? c) (dl-upper? c) (dl-digit? c) (= c "_"))))
|
||||||
|
|
||||||
|
(define dl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
dl-tokenize
|
||||||
|
(fn
|
||||||
|
(src)
|
||||||
|
(let
|
||||||
|
((tokens (list)) (pos 0) (src-len (len src)))
|
||||||
|
(define
|
||||||
|
dl-peek
|
||||||
|
(fn
|
||||||
|
(offset)
|
||||||
|
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||||
|
(define cur (fn () (dl-peek 0)))
|
||||||
|
(define advance! (fn (n) (set! pos (+ pos n))))
|
||||||
|
(define
|
||||||
|
at?
|
||||||
|
(fn
|
||||||
|
(s)
|
||||||
|
(let
|
||||||
|
((sl (len s)))
|
||||||
|
(and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s)))))
|
||||||
|
(define
|
||||||
|
dl-emit!
|
||||||
|
(fn
|
||||||
|
(type value start)
|
||||||
|
(append! tokens (dl-make-token type value start))))
|
||||||
|
(define
|
||||||
|
skip-line-comment!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(when
|
||||||
|
(and (< pos src-len) (not (= (cur) "\n")))
|
||||||
|
(do (advance! 1) (skip-line-comment!)))))
|
||||||
|
(define
|
||||||
|
skip-block-comment!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(cond
|
||||||
|
((>= pos src-len) nil)
|
||||||
|
((and (= (cur) "*") (< (+ pos 1) src-len) (= (dl-peek 1) "/"))
|
||||||
|
(advance! 2))
|
||||||
|
(else (do (advance! 1) (skip-block-comment!))))))
|
||||||
|
(define
|
||||||
|
skip-ws!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(cond
|
||||||
|
((>= pos src-len) nil)
|
||||||
|
((dl-ws? (cur)) (do (advance! 1) (skip-ws!)))
|
||||||
|
((= (cur) "%")
|
||||||
|
(do (advance! 1) (skip-line-comment!) (skip-ws!)))
|
||||||
|
((and (= (cur) "/") (< (+ pos 1) src-len) (= (dl-peek 1) "*"))
|
||||||
|
(do (advance! 2) (skip-block-comment!) (skip-ws!)))
|
||||||
|
(else nil))))
|
||||||
|
(define
|
||||||
|
read-ident
|
||||||
|
(fn
|
||||||
|
(start)
|
||||||
|
(do
|
||||||
|
(when
|
||||||
|
(and (< pos src-len) (dl-ident-char? (cur)))
|
||||||
|
(do (advance! 1) (read-ident start)))
|
||||||
|
(slice src start pos))))
|
||||||
|
(define
|
||||||
|
read-decimal-digits!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(when
|
||||||
|
(and (< pos src-len) (dl-digit? (cur)))
|
||||||
|
(do (advance! 1) (read-decimal-digits!)))))
|
||||||
|
(define
|
||||||
|
read-number
|
||||||
|
(fn
|
||||||
|
(start)
|
||||||
|
(do
|
||||||
|
(read-decimal-digits!)
|
||||||
|
(when
|
||||||
|
(and
|
||||||
|
(< pos src-len)
|
||||||
|
(= (cur) ".")
|
||||||
|
(< (+ pos 1) src-len)
|
||||||
|
(dl-digit? (dl-peek 1)))
|
||||||
|
(do (advance! 1) (read-decimal-digits!)))
|
||||||
|
(parse-number (slice src start pos)))))
|
||||||
|
(define
|
||||||
|
read-quoted
|
||||||
|
(fn
|
||||||
|
(quote-char)
|
||||||
|
(let
|
||||||
|
((chars (list)))
|
||||||
|
(advance! 1)
|
||||||
|
(define
|
||||||
|
loop
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(cond
|
||||||
|
((>= pos src-len) nil)
|
||||||
|
((= (cur) "\\")
|
||||||
|
(do
|
||||||
|
(advance! 1)
|
||||||
|
(when
|
||||||
|
(< pos src-len)
|
||||||
|
(let
|
||||||
|
((ch (cur)))
|
||||||
|
(do
|
||||||
|
(cond
|
||||||
|
((= ch "n") (append! chars "\n"))
|
||||||
|
((= ch "t") (append! chars "\t"))
|
||||||
|
((= ch "r") (append! chars "\r"))
|
||||||
|
((= ch "\\") (append! chars "\\"))
|
||||||
|
((= ch "'") (append! chars "'"))
|
||||||
|
((= ch "\"") (append! chars "\""))
|
||||||
|
(else (append! chars ch)))
|
||||||
|
(advance! 1))))
|
||||||
|
(loop)))
|
||||||
|
((= (cur) quote-char) (advance! 1))
|
||||||
|
(else
|
||||||
|
(do (append! chars (cur)) (advance! 1) (loop))))))
|
||||||
|
(loop)
|
||||||
|
(join "" chars))))
|
||||||
|
(define
|
||||||
|
scan!
|
||||||
|
(fn
|
||||||
|
()
|
||||||
|
(do
|
||||||
|
(skip-ws!)
|
||||||
|
(when
|
||||||
|
(< pos src-len)
|
||||||
|
(let
|
||||||
|
((ch (cur)) (start pos))
|
||||||
|
(cond
|
||||||
|
((at? ":-")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" ":-" start)
|
||||||
|
(advance! 2)
|
||||||
|
(scan!)))
|
||||||
|
((at? "?-")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "?-" start)
|
||||||
|
(advance! 2)
|
||||||
|
(scan!)))
|
||||||
|
((at? "<=")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "<=" start)
|
||||||
|
(advance! 2)
|
||||||
|
(scan!)))
|
||||||
|
((at? ">=")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" ">=" start)
|
||||||
|
(advance! 2)
|
||||||
|
(scan!)))
|
||||||
|
((at? "!=")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "!=" start)
|
||||||
|
(advance! 2)
|
||||||
|
(scan!)))
|
||||||
|
((dl-digit? ch)
|
||||||
|
(do
|
||||||
|
(dl-emit! "number" (read-number start) start)
|
||||||
|
(scan!)))
|
||||||
|
((= ch "'")
|
||||||
|
(do (dl-emit! "atom" (read-quoted "'") start) (scan!)))
|
||||||
|
((= ch "\"")
|
||||||
|
(do (dl-emit! "string" (read-quoted "\"") start) (scan!)))
|
||||||
|
((dl-lower? ch)
|
||||||
|
(do (dl-emit! "atom" (read-ident start) start) (scan!)))
|
||||||
|
((or (dl-upper? ch) (= ch "_"))
|
||||||
|
(do (dl-emit! "var" (read-ident start) start) (scan!)))
|
||||||
|
((= ch "(")
|
||||||
|
(do
|
||||||
|
(dl-emit! "punct" "(" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch ")")
|
||||||
|
(do
|
||||||
|
(dl-emit! "punct" ")" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch ",")
|
||||||
|
(do
|
||||||
|
(dl-emit! "punct" "," start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch ".")
|
||||||
|
(do
|
||||||
|
(dl-emit! "punct" "." start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch "<")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "<" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch ">")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" ">" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch "=")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "=" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch "+")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "+" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch "-")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "-" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch "*")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "*" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
((= ch "/")
|
||||||
|
(do
|
||||||
|
(dl-emit! "op" "/" start)
|
||||||
|
(advance! 1)
|
||||||
|
(scan!)))
|
||||||
|
(else (do (advance! 1) (scan!)))))))))
|
||||||
|
(scan!)
|
||||||
|
(dl-emit! "eof" nil pos)
|
||||||
|
tokens)))
|
||||||
Reference in New Issue
Block a user