Files
rose-ash/spec/tests/test-parser-combinators.sx
giles 516f9c7186 Step 12: Parser combinator library — pure SX, 68 tests
lib/parser-combinators.sx — 46 definitions in 8 layers:
  0. Result constructors (make-ok, make-fail, ok?, result-value, etc.)
  1. Primitives (satisfy, parse-char, any-char, parse-string, eof)
  2. Core combinators (fmap, parse-bind, seq, alt, label, lazy-parser)
  3. Repetition (many, many1, optional, skip-many)
  4. Structural (between, sep-by, sep-by1, skip-left, skip-right,
     not-followed-by, look-ahead)
  5. Character classes (digit, letter, alpha-num, whitespace, skip-spaces)
  6. Literal parsers (number-literal, string-literal, identifier)
  7. Run function (run-parser)
  8. SX tokenizer (sx-comment, sx-keyword, sx-symbol, sx-number,
     sx-string, sx-token, sx-tokenize)

Self-tests by tokenizing SX: (define x 42), {:ok true}, (+ 1 (* 2 3)),
comments, negative numbers, nested parens, recursive grammars.

No evaluator changes. Pure HO functions + thunks for lazy recursion.
2868/2868 tests, zero failures.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 20:14:51 +00:00

424 lines
12 KiB
Plaintext

;; Parser Combinator tests
(defsuite
"pc-primitives"
(deftest
"satisfy matches"
(let
((r (run-parser (satisfy (fn (c) (= c "a")) "a") "abc")))
(assert (ok? r))
(assert= "a" (result-value r))
(assert= 1 (result-pos r))))
(deftest
"satisfy fails"
(let
((r (run-parser (satisfy (fn (c) (= c "x")) "x") "abc")))
(assert (not (ok? r)))
(assert= 0 (result-pos r))))
(deftest
"satisfy fails at eof"
(let
((r (run-parser (satisfy (fn (c) true) "char") "")))
(assert (not (ok? r)))))
(deftest
"parse-char matches"
(let
((r (run-parser (parse-char "b") "bc")))
(assert (ok? r))
(assert= "b" (result-value r))))
(deftest
"parse-char fails"
(let
((r (run-parser (parse-char "x") "bc")))
(assert (not (ok? r)))))
(deftest
"any-char matches"
(let
((r (run-parser any-char "hello")))
(assert (ok? r))
(assert= "h" (result-value r))))
(deftest
"any-char fails on empty"
(assert (not (ok? (run-parser any-char "")))))
(deftest
"parse-string matches"
(let
((r (run-parser (parse-string "hello") "hello world")))
(assert (ok? r))
(assert= "hello" (result-value r))
(assert= 5 (result-pos r))))
(deftest
"parse-string fails on partial"
(assert (not (ok? (run-parser (parse-string "hello") "hel")))))
(deftest
"parse-string fails on mismatch"
(assert (not (ok? (run-parser (parse-string "hello") "world")))))
(deftest "eof succeeds at end" (assert (ok? (run-parser eof ""))))
(deftest
"eof fails with remaining"
(assert (not (ok? (run-parser eof "x"))))))
(defsuite
"pc-core-combinators"
(deftest
"fmap transforms value"
(let
((r (run-parser (fmap upper (parse-char "a")) "abc")))
(assert (ok? r))
(assert= "A" (result-value r))))
(deftest
"fmap propagates failure"
(assert (not (ok? (run-parser (fmap upper (parse-char "x")) "abc")))))
(deftest
"seq two parsers"
(let
((r (run-parser (seq (list (parse-char "a") (parse-char "b"))) "abc")))
(assert (ok? r))
(assert= (list "a" "b") (result-value r))
(assert= 2 (result-pos r))))
(deftest
"seq fails on second"
(assert
(not
(ok?
(run-parser (seq (list (parse-char "a") (parse-char "x"))) "abc")))))
(deftest
"seq empty list"
(let
((r (run-parser (seq (list)) "abc")))
(assert (ok? r))
(assert= (list) (result-value r))
(assert= 0 (result-pos r))))
(deftest
"alt first succeeds"
(let
((r (run-parser (alt (list (parse-char "a") (parse-char "b"))) "abc")))
(assert (ok? r))
(assert= "a" (result-value r))))
(deftest
"alt second succeeds"
(let
((r (run-parser (alt (list (parse-char "x") (parse-char "a"))) "abc")))
(assert (ok? r))
(assert= "a" (result-value r))))
(deftest
"alt all fail"
(assert
(not
(ok?
(run-parser (alt (list (parse-char "x") (parse-char "y"))) "abc")))))
(deftest
"label overrides error"
(let
((r (run-parser (label "my-thing" (parse-char "x")) "abc")))
(assert (not (ok? r)))
(assert= "my-thing" (result-expected r))))
(deftest
"parse-bind chains"
(let
((p (parse-bind (parse-char "a") (fn (ch) (parse-char "b")))))
(let
((r (run-parser p "abc")))
(assert (ok? r))
(assert= "b" (result-value r))
(assert= 2 (result-pos r)))))
(deftest
"lazy-parser defers"
(let
((p (lazy-parser (fn () (parse-char "a")))))
(assert (ok? (run-parser p "abc"))))))
(defsuite
"pc-repetition"
(deftest
"many zero matches"
(let
((r (run-parser (many (parse-char "x")) "abc")))
(assert (ok? r))
(assert= (list) (result-value r))
(assert= 0 (result-pos r))))
(deftest
"many multiple matches"
(let
((r (run-parser (many (parse-char "a")) "aaab")))
(assert (ok? r))
(assert= (list "a" "a" "a") (result-value r))
(assert= 3 (result-pos r))))
(deftest
"many1 one match"
(let
((r (run-parser (many1 (parse-char "a")) "ab")))
(assert (ok? r))
(assert= (list "a") (result-value r))))
(deftest
"many1 fails on zero"
(assert (not (ok? (run-parser (many1 (parse-char "a")) "bc")))))
(deftest
"optional present"
(let
((r (run-parser (optional (parse-char "a")) "abc")))
(assert (ok? r))
(assert= "a" (result-value r))))
(deftest
"optional absent"
(let
((r (run-parser (optional (parse-char "x")) "abc")))
(assert (ok? r))
(assert= nil (result-value r))
(assert= 0 (result-pos r))))
(deftest
"skip-many consumes"
(let
((r (run-parser (skip-many (parse-char " ")) " abc")))
(assert (ok? r))
(assert= 3 (result-pos r)))))
(defsuite
"pc-structural"
(deftest
"between parens"
(let
((r (run-parser (between (parse-char "(") (parse-char ")") (parse-char "x")) "(x)")))
(assert (ok? r))
(assert= "x" (result-value r))))
(deftest
"sep-by with items"
(let
((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "a,a,a")))
(assert (ok? r))
(assert= (list "a" "a" "a") (result-value r))))
(deftest
"sep-by empty"
(let
((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "xyz")))
(assert (ok? r))
(assert= (list) (result-value r))))
(deftest
"sep-by1 with items"
(let
((r (run-parser (sep-by1 digit (parse-char ",")) "1,2,3")))
(assert (ok? r))
(assert= (list "1" "2" "3") (result-value r))))
(deftest
"sep-by1 fails on empty"
(assert
(not (ok? (run-parser (sep-by1 digit (parse-char ",")) "xyz")))))
(deftest
"skip-left discards first"
(let
((r (run-parser (skip-left (parse-char " ") (parse-char "x")) " x")))
(assert (ok? r))
(assert= "x" (result-value r))))
(deftest
"skip-right discards second"
(let
((r (run-parser (skip-right (parse-char "x") (parse-char " ")) "x ")))
(assert (ok? r))
(assert= "x" (result-value r))))
(deftest
"not-followed-by succeeds on failure"
(let
((r (run-parser (not-followed-by (parse-char "x")) "abc")))
(assert (ok? r))
(assert= 0 (result-pos r))))
(deftest
"not-followed-by fails on success"
(assert
(not (ok? (run-parser (not-followed-by (parse-char "a")) "abc")))))
(deftest
"look-ahead preserves position"
(let
((r (run-parser (look-ahead (parse-char "a")) "abc")))
(assert (ok? r))
(assert= "a" (result-value r))
(assert= 0 (result-pos r)))))
(defsuite
"pc-char-classes"
(deftest "digit matches" (assert (ok? (run-parser digit "5"))))
(deftest
"digit fails on letter"
(assert (not (ok? (run-parser digit "a")))))
(deftest "letter matches" (assert (ok? (run-parser letter "a"))))
(deftest
"letter fails on digit"
(assert (not (ok? (run-parser letter "5")))))
(deftest
"alpha-num matches digit"
(assert (ok? (run-parser alpha-num "5"))))
(deftest
"alpha-num matches letter"
(assert (ok? (run-parser alpha-num "z"))))
(deftest
"skip-spaces skips"
(let
((r (run-parser skip-spaces " \t\nabc")))
(assert (ok? r))
(assert= 4 (result-pos r))))
(deftest
"skip-spaces on no spaces"
(let
((r (run-parser skip-spaces "abc")))
(assert (ok? r))
(assert= 0 (result-pos r)))))
(defsuite
"pc-literals"
(deftest
"number integer"
(let
((r (run-parser number-literal "42abc")))
(assert (ok? r))
(assert= 42 (result-value r))
(assert= 2 (result-pos r))))
(deftest
"number decimal"
(let
((r (run-parser number-literal "3.14rest")))
(assert (ok? r))
(assert= 3.14 (result-value r))))
(deftest
"number negative"
(let
((r (run-parser number-literal "-7x")))
(assert (ok? r))
(assert= -7 (result-value r))))
(deftest
"number fails on non-digit"
(assert (not (ok? (run-parser number-literal "abc")))))
(deftest
"string-literal basic"
(let
((r (run-parser string-literal "\"hello\" rest")))
(assert (ok? r))
(assert= "hello" (result-value r))))
(deftest
"string-literal with escapes"
(let
((r (run-parser string-literal "\"a\\nb\"")))
(assert (ok? r))
(assert= "a\nb" (result-value r))))
(deftest
"string-literal fails without quote"
(assert (not (ok? (run-parser string-literal "hello")))))
(deftest
"identifier basic"
(let
((r (run-parser identifier "hello-world rest")))
(assert (ok? r))
(assert= "hello-world" (result-value r))))
(deftest
"identifier with special chars"
(let
((r (run-parser identifier "empty? x")))
(assert (ok? r))
(assert= "empty?" (result-value r)))))
(defsuite
"pc-sx-tokenizer"
(deftest
"tokenize number"
(let
((tokens (result-value (run-parser sx-tokenize "42"))))
(assert= 1 (len tokens))
(assert= "number" (get (first tokens) "type"))
(assert= 42 (get (first tokens) "value"))))
(deftest
"tokenize string"
(let
((tokens (result-value (run-parser sx-tokenize "\"hi\""))))
(assert= 1 (len tokens))
(assert= "string" (get (first tokens) "type"))
(assert= "hi" (get (first tokens) "value"))))
(deftest
"tokenize keyword"
(let
((tokens (result-value (run-parser sx-tokenize ":ok"))))
(assert= 1 (len tokens))
(assert= "keyword" (get (first tokens) "type"))
(assert= ":ok" (get (first tokens) "value"))))
(deftest
"tokenize symbol"
(let
((tokens (result-value (run-parser sx-tokenize "define"))))
(assert= 1 (len tokens))
(assert= "symbol" (get (first tokens) "type"))
(assert= "define" (get (first tokens) "value"))))
(deftest
"tokenize simple expression"
(let
((tokens (result-value (run-parser sx-tokenize "(+ 1 2)"))))
(assert= 5 (len tokens))
(assert= "open-paren" (get (first tokens) "type"))
(assert= "symbol" (get (nth tokens 1) "type"))
(assert= "+" (get (nth tokens 1) "value"))
(assert= "number" (get (nth tokens 2) "type"))
(assert= "close-paren" (get (nth tokens 4) "type"))))
(deftest
"tokenize define expression"
(let
((tokens (result-value (run-parser sx-tokenize "(define x 42)"))))
(assert= 5 (len tokens))
(assert= "symbol" (get (nth tokens 1) "type"))
(assert= "define" (get (nth tokens 1) "value"))
(assert= "number" (get (nth tokens 3) "type"))
(assert= 42 (get (nth tokens 3) "value"))))
(deftest
"tokenize dict literal"
(let
((tokens (result-value (run-parser sx-tokenize "{:ok true :value 42}"))))
(assert= 6 (len tokens))
(assert= "open-brace" (get (first tokens) "type"))
(assert= "keyword" (get (nth tokens 1) "type"))
(assert= "close-brace" (get (nth tokens 5) "type"))))
(deftest
"tokenize with comment"
(let
((tokens (result-value (run-parser sx-tokenize ";; comment\n42"))))
(assert= 1 (len tokens))
(assert= "number" (get (first tokens) "type"))))
(deftest
"tokenize nested expression"
(let
((tokens (result-value (run-parser sx-tokenize "(+ 1 (* 2 3))"))))
(assert= 9 (len tokens))
(assert= "open-paren" (get (first tokens) "type"))
(assert= "open-paren" (get (nth tokens 3) "type"))
(assert= "close-paren" (get (nth tokens 7) "type"))
(assert= "close-paren" (get (nth tokens 8) "type")))))
(defsuite
"pc-recursive"
(deftest
"lazy-parser enables recursion"
(define
nested-a
(lazy-parser
(fn
()
(alt
(list
(parse-char "a")
(fmap
(fn (parts) (nth parts 1))
(seq (list (parse-char "(") nested-a (parse-char ")")))))))))
(assert (ok? (run-parser nested-a "a")))
(assert= "a" (result-value (run-parser nested-a "a")))
(assert= "a" (result-value (run-parser nested-a "(a)")))
(assert= "a" (result-value (run-parser nested-a "((a))"))))
(deftest
"matched parens depth"
(define
paren-depth
(lazy-parser
(fn
()
(alt
(list
(fmap
(fn (parts) (+ 1 (nth parts 1)))
(seq (list (parse-char "(") paren-depth (parse-char ")"))))
(fn (input pos) (make-ok 0 pos)))))))
(assert= 0 (result-value (run-parser paren-depth "")))
(assert= 1 (result-value (run-parser paren-depth "()")))
(assert= 3 (result-value (run-parser paren-depth "((()))")))))