lib/parser-combinators.sx — 46 definitions in 8 layers:
0. Result constructors (make-ok, make-fail, ok?, result-value, etc.)
1. Primitives (satisfy, parse-char, any-char, parse-string, eof)
2. Core combinators (fmap, parse-bind, seq, alt, label, lazy-parser)
3. Repetition (many, many1, optional, skip-many)
4. Structural (between, sep-by, sep-by1, skip-left, skip-right,
not-followed-by, look-ahead)
5. Character classes (digit, letter, alpha-num, whitespace, skip-spaces)
6. Literal parsers (number-literal, string-literal, identifier)
7. Run function (run-parser)
8. SX tokenizer (sx-comment, sx-keyword, sx-symbol, sx-number,
sx-string, sx-token, sx-tokenize)
Self-tests by tokenizing SX: (define x 42), {:ok true}, (+ 1 (* 2 3)),
comments, negative numbers, nested parens, recursive grammars.
No evaluator changes. Pure HO functions + thunks for lazy recursion.
2868/2868 tests, zero failures.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
424 lines
12 KiB
Plaintext
424 lines
12 KiB
Plaintext
;; Parser Combinator tests
|
|
|
|
(defsuite
|
|
"pc-primitives"
|
|
(deftest
|
|
"satisfy matches"
|
|
(let
|
|
((r (run-parser (satisfy (fn (c) (= c "a")) "a") "abc")))
|
|
(assert (ok? r))
|
|
(assert= "a" (result-value r))
|
|
(assert= 1 (result-pos r))))
|
|
(deftest
|
|
"satisfy fails"
|
|
(let
|
|
((r (run-parser (satisfy (fn (c) (= c "x")) "x") "abc")))
|
|
(assert (not (ok? r)))
|
|
(assert= 0 (result-pos r))))
|
|
(deftest
|
|
"satisfy fails at eof"
|
|
(let
|
|
((r (run-parser (satisfy (fn (c) true) "char") "")))
|
|
(assert (not (ok? r)))))
|
|
(deftest
|
|
"parse-char matches"
|
|
(let
|
|
((r (run-parser (parse-char "b") "bc")))
|
|
(assert (ok? r))
|
|
(assert= "b" (result-value r))))
|
|
(deftest
|
|
"parse-char fails"
|
|
(let
|
|
((r (run-parser (parse-char "x") "bc")))
|
|
(assert (not (ok? r)))))
|
|
(deftest
|
|
"any-char matches"
|
|
(let
|
|
((r (run-parser any-char "hello")))
|
|
(assert (ok? r))
|
|
(assert= "h" (result-value r))))
|
|
(deftest
|
|
"any-char fails on empty"
|
|
(assert (not (ok? (run-parser any-char "")))))
|
|
(deftest
|
|
"parse-string matches"
|
|
(let
|
|
((r (run-parser (parse-string "hello") "hello world")))
|
|
(assert (ok? r))
|
|
(assert= "hello" (result-value r))
|
|
(assert= 5 (result-pos r))))
|
|
(deftest
|
|
"parse-string fails on partial"
|
|
(assert (not (ok? (run-parser (parse-string "hello") "hel")))))
|
|
(deftest
|
|
"parse-string fails on mismatch"
|
|
(assert (not (ok? (run-parser (parse-string "hello") "world")))))
|
|
(deftest "eof succeeds at end" (assert (ok? (run-parser eof ""))))
|
|
(deftest
|
|
"eof fails with remaining"
|
|
(assert (not (ok? (run-parser eof "x"))))))
|
|
|
|
(defsuite
|
|
"pc-core-combinators"
|
|
(deftest
|
|
"fmap transforms value"
|
|
(let
|
|
((r (run-parser (fmap upper (parse-char "a")) "abc")))
|
|
(assert (ok? r))
|
|
(assert= "A" (result-value r))))
|
|
(deftest
|
|
"fmap propagates failure"
|
|
(assert (not (ok? (run-parser (fmap upper (parse-char "x")) "abc")))))
|
|
(deftest
|
|
"seq two parsers"
|
|
(let
|
|
((r (run-parser (seq (list (parse-char "a") (parse-char "b"))) "abc")))
|
|
(assert (ok? r))
|
|
(assert= (list "a" "b") (result-value r))
|
|
(assert= 2 (result-pos r))))
|
|
(deftest
|
|
"seq fails on second"
|
|
(assert
|
|
(not
|
|
(ok?
|
|
(run-parser (seq (list (parse-char "a") (parse-char "x"))) "abc")))))
|
|
(deftest
|
|
"seq empty list"
|
|
(let
|
|
((r (run-parser (seq (list)) "abc")))
|
|
(assert (ok? r))
|
|
(assert= (list) (result-value r))
|
|
(assert= 0 (result-pos r))))
|
|
(deftest
|
|
"alt first succeeds"
|
|
(let
|
|
((r (run-parser (alt (list (parse-char "a") (parse-char "b"))) "abc")))
|
|
(assert (ok? r))
|
|
(assert= "a" (result-value r))))
|
|
(deftest
|
|
"alt second succeeds"
|
|
(let
|
|
((r (run-parser (alt (list (parse-char "x") (parse-char "a"))) "abc")))
|
|
(assert (ok? r))
|
|
(assert= "a" (result-value r))))
|
|
(deftest
|
|
"alt all fail"
|
|
(assert
|
|
(not
|
|
(ok?
|
|
(run-parser (alt (list (parse-char "x") (parse-char "y"))) "abc")))))
|
|
(deftest
|
|
"label overrides error"
|
|
(let
|
|
((r (run-parser (label "my-thing" (parse-char "x")) "abc")))
|
|
(assert (not (ok? r)))
|
|
(assert= "my-thing" (result-expected r))))
|
|
(deftest
|
|
"parse-bind chains"
|
|
(let
|
|
((p (parse-bind (parse-char "a") (fn (ch) (parse-char "b")))))
|
|
(let
|
|
((r (run-parser p "abc")))
|
|
(assert (ok? r))
|
|
(assert= "b" (result-value r))
|
|
(assert= 2 (result-pos r)))))
|
|
(deftest
|
|
"lazy-parser defers"
|
|
(let
|
|
((p (lazy-parser (fn () (parse-char "a")))))
|
|
(assert (ok? (run-parser p "abc"))))))
|
|
|
|
(defsuite
|
|
"pc-repetition"
|
|
(deftest
|
|
"many zero matches"
|
|
(let
|
|
((r (run-parser (many (parse-char "x")) "abc")))
|
|
(assert (ok? r))
|
|
(assert= (list) (result-value r))
|
|
(assert= 0 (result-pos r))))
|
|
(deftest
|
|
"many multiple matches"
|
|
(let
|
|
((r (run-parser (many (parse-char "a")) "aaab")))
|
|
(assert (ok? r))
|
|
(assert= (list "a" "a" "a") (result-value r))
|
|
(assert= 3 (result-pos r))))
|
|
(deftest
|
|
"many1 one match"
|
|
(let
|
|
((r (run-parser (many1 (parse-char "a")) "ab")))
|
|
(assert (ok? r))
|
|
(assert= (list "a") (result-value r))))
|
|
(deftest
|
|
"many1 fails on zero"
|
|
(assert (not (ok? (run-parser (many1 (parse-char "a")) "bc")))))
|
|
(deftest
|
|
"optional present"
|
|
(let
|
|
((r (run-parser (optional (parse-char "a")) "abc")))
|
|
(assert (ok? r))
|
|
(assert= "a" (result-value r))))
|
|
(deftest
|
|
"optional absent"
|
|
(let
|
|
((r (run-parser (optional (parse-char "x")) "abc")))
|
|
(assert (ok? r))
|
|
(assert= nil (result-value r))
|
|
(assert= 0 (result-pos r))))
|
|
(deftest
|
|
"skip-many consumes"
|
|
(let
|
|
((r (run-parser (skip-many (parse-char " ")) " abc")))
|
|
(assert (ok? r))
|
|
(assert= 3 (result-pos r)))))
|
|
|
|
(defsuite
|
|
"pc-structural"
|
|
(deftest
|
|
"between parens"
|
|
(let
|
|
((r (run-parser (between (parse-char "(") (parse-char ")") (parse-char "x")) "(x)")))
|
|
(assert (ok? r))
|
|
(assert= "x" (result-value r))))
|
|
(deftest
|
|
"sep-by with items"
|
|
(let
|
|
((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "a,a,a")))
|
|
(assert (ok? r))
|
|
(assert= (list "a" "a" "a") (result-value r))))
|
|
(deftest
|
|
"sep-by empty"
|
|
(let
|
|
((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "xyz")))
|
|
(assert (ok? r))
|
|
(assert= (list) (result-value r))))
|
|
(deftest
|
|
"sep-by1 with items"
|
|
(let
|
|
((r (run-parser (sep-by1 digit (parse-char ",")) "1,2,3")))
|
|
(assert (ok? r))
|
|
(assert= (list "1" "2" "3") (result-value r))))
|
|
(deftest
|
|
"sep-by1 fails on empty"
|
|
(assert
|
|
(not (ok? (run-parser (sep-by1 digit (parse-char ",")) "xyz")))))
|
|
(deftest
|
|
"skip-left discards first"
|
|
(let
|
|
((r (run-parser (skip-left (parse-char " ") (parse-char "x")) " x")))
|
|
(assert (ok? r))
|
|
(assert= "x" (result-value r))))
|
|
(deftest
|
|
"skip-right discards second"
|
|
(let
|
|
((r (run-parser (skip-right (parse-char "x") (parse-char " ")) "x ")))
|
|
(assert (ok? r))
|
|
(assert= "x" (result-value r))))
|
|
(deftest
|
|
"not-followed-by succeeds on failure"
|
|
(let
|
|
((r (run-parser (not-followed-by (parse-char "x")) "abc")))
|
|
(assert (ok? r))
|
|
(assert= 0 (result-pos r))))
|
|
(deftest
|
|
"not-followed-by fails on success"
|
|
(assert
|
|
(not (ok? (run-parser (not-followed-by (parse-char "a")) "abc")))))
|
|
(deftest
|
|
"look-ahead preserves position"
|
|
(let
|
|
((r (run-parser (look-ahead (parse-char "a")) "abc")))
|
|
(assert (ok? r))
|
|
(assert= "a" (result-value r))
|
|
(assert= 0 (result-pos r)))))
|
|
|
|
(defsuite
|
|
"pc-char-classes"
|
|
(deftest "digit matches" (assert (ok? (run-parser digit "5"))))
|
|
(deftest
|
|
"digit fails on letter"
|
|
(assert (not (ok? (run-parser digit "a")))))
|
|
(deftest "letter matches" (assert (ok? (run-parser letter "a"))))
|
|
(deftest
|
|
"letter fails on digit"
|
|
(assert (not (ok? (run-parser letter "5")))))
|
|
(deftest
|
|
"alpha-num matches digit"
|
|
(assert (ok? (run-parser alpha-num "5"))))
|
|
(deftest
|
|
"alpha-num matches letter"
|
|
(assert (ok? (run-parser alpha-num "z"))))
|
|
(deftest
|
|
"skip-spaces skips"
|
|
(let
|
|
((r (run-parser skip-spaces " \t\nabc")))
|
|
(assert (ok? r))
|
|
(assert= 4 (result-pos r))))
|
|
(deftest
|
|
"skip-spaces on no spaces"
|
|
(let
|
|
((r (run-parser skip-spaces "abc")))
|
|
(assert (ok? r))
|
|
(assert= 0 (result-pos r)))))
|
|
|
|
(defsuite
|
|
"pc-literals"
|
|
(deftest
|
|
"number integer"
|
|
(let
|
|
((r (run-parser number-literal "42abc")))
|
|
(assert (ok? r))
|
|
(assert= 42 (result-value r))
|
|
(assert= 2 (result-pos r))))
|
|
(deftest
|
|
"number decimal"
|
|
(let
|
|
((r (run-parser number-literal "3.14rest")))
|
|
(assert (ok? r))
|
|
(assert= 3.14 (result-value r))))
|
|
(deftest
|
|
"number negative"
|
|
(let
|
|
((r (run-parser number-literal "-7x")))
|
|
(assert (ok? r))
|
|
(assert= -7 (result-value r))))
|
|
(deftest
|
|
"number fails on non-digit"
|
|
(assert (not (ok? (run-parser number-literal "abc")))))
|
|
(deftest
|
|
"string-literal basic"
|
|
(let
|
|
((r (run-parser string-literal "\"hello\" rest")))
|
|
(assert (ok? r))
|
|
(assert= "hello" (result-value r))))
|
|
(deftest
|
|
"string-literal with escapes"
|
|
(let
|
|
((r (run-parser string-literal "\"a\\nb\"")))
|
|
(assert (ok? r))
|
|
(assert= "a\nb" (result-value r))))
|
|
(deftest
|
|
"string-literal fails without quote"
|
|
(assert (not (ok? (run-parser string-literal "hello")))))
|
|
(deftest
|
|
"identifier basic"
|
|
(let
|
|
((r (run-parser identifier "hello-world rest")))
|
|
(assert (ok? r))
|
|
(assert= "hello-world" (result-value r))))
|
|
(deftest
|
|
"identifier with special chars"
|
|
(let
|
|
((r (run-parser identifier "empty? x")))
|
|
(assert (ok? r))
|
|
(assert= "empty?" (result-value r)))))
|
|
|
|
(defsuite
|
|
"pc-sx-tokenizer"
|
|
(deftest
|
|
"tokenize number"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize "42"))))
|
|
(assert= 1 (len tokens))
|
|
(assert= "number" (get (first tokens) "type"))
|
|
(assert= 42 (get (first tokens) "value"))))
|
|
(deftest
|
|
"tokenize string"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize "\"hi\""))))
|
|
(assert= 1 (len tokens))
|
|
(assert= "string" (get (first tokens) "type"))
|
|
(assert= "hi" (get (first tokens) "value"))))
|
|
(deftest
|
|
"tokenize keyword"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize ":ok"))))
|
|
(assert= 1 (len tokens))
|
|
(assert= "keyword" (get (first tokens) "type"))
|
|
(assert= ":ok" (get (first tokens) "value"))))
|
|
(deftest
|
|
"tokenize symbol"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize "define"))))
|
|
(assert= 1 (len tokens))
|
|
(assert= "symbol" (get (first tokens) "type"))
|
|
(assert= "define" (get (first tokens) "value"))))
|
|
(deftest
|
|
"tokenize simple expression"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize "(+ 1 2)"))))
|
|
(assert= 5 (len tokens))
|
|
(assert= "open-paren" (get (first tokens) "type"))
|
|
(assert= "symbol" (get (nth tokens 1) "type"))
|
|
(assert= "+" (get (nth tokens 1) "value"))
|
|
(assert= "number" (get (nth tokens 2) "type"))
|
|
(assert= "close-paren" (get (nth tokens 4) "type"))))
|
|
(deftest
|
|
"tokenize define expression"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize "(define x 42)"))))
|
|
(assert= 5 (len tokens))
|
|
(assert= "symbol" (get (nth tokens 1) "type"))
|
|
(assert= "define" (get (nth tokens 1) "value"))
|
|
(assert= "number" (get (nth tokens 3) "type"))
|
|
(assert= 42 (get (nth tokens 3) "value"))))
|
|
(deftest
|
|
"tokenize dict literal"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize "{:ok true :value 42}"))))
|
|
(assert= 6 (len tokens))
|
|
(assert= "open-brace" (get (first tokens) "type"))
|
|
(assert= "keyword" (get (nth tokens 1) "type"))
|
|
(assert= "close-brace" (get (nth tokens 5) "type"))))
|
|
(deftest
|
|
"tokenize with comment"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize ";; comment\n42"))))
|
|
(assert= 1 (len tokens))
|
|
(assert= "number" (get (first tokens) "type"))))
|
|
(deftest
|
|
"tokenize nested expression"
|
|
(let
|
|
((tokens (result-value (run-parser sx-tokenize "(+ 1 (* 2 3))"))))
|
|
(assert= 9 (len tokens))
|
|
(assert= "open-paren" (get (first tokens) "type"))
|
|
(assert= "open-paren" (get (nth tokens 3) "type"))
|
|
(assert= "close-paren" (get (nth tokens 7) "type"))
|
|
(assert= "close-paren" (get (nth tokens 8) "type")))))
|
|
|
|
(defsuite
|
|
"pc-recursive"
|
|
(deftest
|
|
"lazy-parser enables recursion"
|
|
(define
|
|
nested-a
|
|
(lazy-parser
|
|
(fn
|
|
()
|
|
(alt
|
|
(list
|
|
(parse-char "a")
|
|
(fmap
|
|
(fn (parts) (nth parts 1))
|
|
(seq (list (parse-char "(") nested-a (parse-char ")")))))))))
|
|
(assert (ok? (run-parser nested-a "a")))
|
|
(assert= "a" (result-value (run-parser nested-a "a")))
|
|
(assert= "a" (result-value (run-parser nested-a "(a)")))
|
|
(assert= "a" (result-value (run-parser nested-a "((a))"))))
|
|
(deftest
|
|
"matched parens depth"
|
|
(define
|
|
paren-depth
|
|
(lazy-parser
|
|
(fn
|
|
()
|
|
(alt
|
|
(list
|
|
(fmap
|
|
(fn (parts) (+ 1 (nth parts 1)))
|
|
(seq (list (parse-char "(") paren-depth (parse-char ")"))))
|
|
(fn (input pos) (make-ok 0 pos)))))))
|
|
(assert= 0 (result-value (run-parser paren-depth "")))
|
|
(assert= 1 (result-value (run-parser paren-depth "()")))
|
|
(assert= 3 (result-value (run-parser paren-depth "((()))"))))) |