Step 12: Parser combinator library — pure SX, 68 tests
lib/parser-combinators.sx — 46 definitions in 8 layers:
0. Result constructors (make-ok, make-fail, ok?, result-value, etc.)
1. Primitives (satisfy, parse-char, any-char, parse-string, eof)
2. Core combinators (fmap, parse-bind, seq, alt, label, lazy-parser)
3. Repetition (many, many1, optional, skip-many)
4. Structural (between, sep-by, sep-by1, skip-left, skip-right,
not-followed-by, look-ahead)
5. Character classes (digit, letter, alpha-num, whitespace, skip-spaces)
6. Literal parsers (number-literal, string-literal, identifier)
7. Run function (run-parser)
8. SX tokenizer (sx-comment, sx-keyword, sx-symbol, sx-number,
sx-string, sx-token, sx-tokenize)
Self-tests by tokenizing SX: (define x 42), {:ok true}, (+ 1 (* 2 3)),
comments, negative numbers, nested parens, recursive grammars.
No evaluator changes. Pure HO functions + thunks for lazy recursion.
2868/2868 tests, zero failures.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1352,6 +1352,7 @@ let run_spec_tests env test_files =
|
||||
load_module "signals.sx" web_dir; (* web extensions *)
|
||||
load_module "freeze.sx" lib_dir;
|
||||
load_module "content.sx" lib_dir;
|
||||
load_module "parser-combinators.sx" lib_dir;
|
||||
load_module "types.sx" lib_dir;
|
||||
load_module "sx-swap.sx" lib_dir;
|
||||
(* Shared templates: TW styling engine *)
|
||||
|
||||
485
lib/parser-combinators.sx
Normal file
485
lib/parser-combinators.sx
Normal file
@@ -0,0 +1,485 @@
|
||||
;; Parser Combinator Library — Pure SX
|
||||
;;
|
||||
;; A parser is (fn (input pos) result) where:
|
||||
;; Success: {:ok true :value val :pos new-pos}
|
||||
;; Failure: {:ok false :expected desc :pos pos}
|
||||
;;
|
||||
;; Combinators compose parsers: seq, alt, many, sep-by, between, etc.
|
||||
;; Recursive grammars use (lazy-parser (fn () ...)) thunks.
|
||||
|
||||
;; ── Layer 0: Result constructors ──────────────────────────────────
|
||||
|
||||
(define make-ok (fn (val pos) {:pos pos :ok true :value val}))
|
||||
(define make-fail (fn (expected pos) {:pos pos :ok false :expected expected}))
|
||||
(define ok? (fn (r) (get r "ok")))
|
||||
(define result-value (fn (r) (get r "value")))
|
||||
(define result-pos (fn (r) (get r "pos")))
|
||||
(define result-expected (fn (r) (get r "expected")))
|
||||
|
||||
;; ── Layer 1: Primitive parsers ────────────────────────────────────
|
||||
|
||||
(define
|
||||
satisfy
|
||||
(fn
|
||||
(pred desc)
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(< pos (len input))
|
||||
(let
|
||||
((ch (nth input pos)))
|
||||
(if (pred ch) (make-ok ch (+ pos 1)) (make-fail desc pos)))
|
||||
(make-fail desc pos)))))
|
||||
|
||||
(define
|
||||
parse-char
|
||||
(fn (ch) (satisfy (fn (c) (= c ch)) (str "'" ch "'"))))
|
||||
|
||||
(define any-char (satisfy (fn (c) true) "any character"))
|
||||
|
||||
(define
|
||||
parse-string
|
||||
(fn
|
||||
(target)
|
||||
(let
|
||||
((target-len (len target)))
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(<= (+ pos target-len) (len input))
|
||||
(if
|
||||
(= (slice input pos (+ pos target-len)) target)
|
||||
(make-ok target (+ pos target-len))
|
||||
(make-fail (str "\"" target "\"") pos))
|
||||
(make-fail (str "\"" target "\"") pos))))))
|
||||
|
||||
(define
|
||||
eof
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(>= pos (len input))
|
||||
(make-ok nil pos)
|
||||
(make-fail "end of input" pos))))
|
||||
|
||||
;; ── Layer 2: Core combinators ─────────────────────────────────────
|
||||
|
||||
(define
|
||||
fmap
|
||||
(fn
|
||||
(f parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) (make-ok (f (result-value r)) (result-pos r)) r)))))
|
||||
|
||||
(define
|
||||
parse-bind
|
||||
(fn
|
||||
(parser f)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) ((f (result-value r)) input (result-pos r)) r)))))
|
||||
|
||||
(define
|
||||
seq
|
||||
(fn
|
||||
(parsers)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((results (list)) (cur-pos pos) (failed nil))
|
||||
(for-each
|
||||
(fn
|
||||
(p)
|
||||
(when
|
||||
(not failed)
|
||||
(let
|
||||
((r (p input cur-pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(do
|
||||
(append! results (result-value r))
|
||||
(set! cur-pos (result-pos r)))
|
||||
(set! failed r)))))
|
||||
parsers)
|
||||
(if failed failed (make-ok results cur-pos))))))
|
||||
|
||||
(define seq2 (fn (p1 p2) (seq (list p1 p2))))
|
||||
|
||||
(define
|
||||
alt
|
||||
(fn
|
||||
(parsers)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((best-fail nil))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
(ps)
|
||||
(if
|
||||
(empty? ps)
|
||||
(or best-fail (make-fail "no alternatives" pos))
|
||||
(let
|
||||
((r ((first ps) input pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
r
|
||||
(do
|
||||
(when
|
||||
(or
|
||||
(not best-fail)
|
||||
(> (result-pos r) (result-pos best-fail)))
|
||||
(set! best-fail r))
|
||||
(loop (rest ps))))))))
|
||||
(loop parsers)))))
|
||||
|
||||
(define alt2 (fn (p1 p2) (alt (list p1 p2))))
|
||||
|
||||
(define
|
||||
label
|
||||
(fn
|
||||
(name parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let ((r (parser input pos))) (if (ok? r) r (make-fail name pos))))))
|
||||
|
||||
(define lazy-parser (fn (thunk) (fn (input pos) ((thunk) input pos))))
|
||||
|
||||
;; ── Layer 3: Repetition combinators ───────────────────────────────
|
||||
|
||||
(define
|
||||
many
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((results (list)) (cur-pos pos))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((r (parser input cur-pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(do
|
||||
(append! results (result-value r))
|
||||
(set! cur-pos (result-pos r))
|
||||
(loop))
|
||||
(make-ok results cur-pos)))))
|
||||
(loop)))))
|
||||
|
||||
(define
|
||||
many1
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(let
|
||||
((rest-r ((many parser) input (result-pos r))))
|
||||
(make-ok
|
||||
(cons (result-value r) (result-value rest-r))
|
||||
(result-pos rest-r)))
|
||||
r)))))
|
||||
|
||||
(define
|
||||
optional
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let ((r (parser input pos))) (if (ok? r) r (make-ok nil pos))))))
|
||||
|
||||
(define
|
||||
skip-many
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((cur-pos pos))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((r (parser input cur-pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(do (set! cur-pos (result-pos r)) (loop))
|
||||
(make-ok nil cur-pos)))))
|
||||
(loop)))))
|
||||
|
||||
;; ── Layer 4: Structural combinators ───────────────────────────────
|
||||
|
||||
(define
|
||||
between
|
||||
(fn
|
||||
(open close body)
|
||||
(fmap (fn (results) (nth results 1)) (seq (list open body close)))))
|
||||
|
||||
(define
|
||||
sep-by1
|
||||
(fn
|
||||
(parser sep)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((first-r (parser input pos)))
|
||||
(if
|
||||
(ok? first-r)
|
||||
(let
|
||||
((rest-r ((many (fmap (fn (pair) (nth pair 1)) (seq (list sep parser)))) input (result-pos first-r))))
|
||||
(make-ok
|
||||
(cons (result-value first-r) (result-value rest-r))
|
||||
(result-pos rest-r)))
|
||||
first-r)))))
|
||||
|
||||
(define
|
||||
sep-by
|
||||
(fn
|
||||
(parser sep)
|
||||
(alt
|
||||
(list (sep-by1 parser sep) (fn (input pos) (make-ok (list) pos))))))
|
||||
|
||||
(define
|
||||
skip-left
|
||||
(fn
|
||||
(skip keep)
|
||||
(fmap (fn (results) (nth results 1)) (seq (list skip keep)))))
|
||||
|
||||
(define
|
||||
skip-right
|
||||
(fn
|
||||
(keep skip)
|
||||
(fmap (fn (results) (first results)) (seq (list keep skip)))))
|
||||
|
||||
(define
|
||||
not-followed-by
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) (make-fail "not followed by" pos) (make-ok nil pos))))))
|
||||
|
||||
(define
|
||||
look-ahead
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) (make-ok (result-value r) pos) r)))))
|
||||
|
||||
;; ── Layer 5: Character class parsers ──────────────────────────────
|
||||
|
||||
(define digit (satisfy (fn (c) (and (>= c "0") (<= c "9"))) "digit"))
|
||||
|
||||
(define
|
||||
letter
|
||||
(satisfy
|
||||
(fn
|
||||
(c)
|
||||
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))
|
||||
"letter"))
|
||||
|
||||
(define alpha-num (alt2 letter digit))
|
||||
|
||||
(define
|
||||
whitespace-char
|
||||
(satisfy
|
||||
(fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))
|
||||
"whitespace"))
|
||||
|
||||
(define skip-spaces (skip-many whitespace-char))
|
||||
|
||||
;; ── Layer 6: Literal parsers ──────────────────────────────────────
|
||||
|
||||
(define
|
||||
number-literal
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((start pos) (cur pos) (input-len (len input)))
|
||||
(when
|
||||
(and (< cur input-len) (= (nth input cur) "-"))
|
||||
(set! cur (+ cur 1)))
|
||||
(let
|
||||
((digit-start cur))
|
||||
(define
|
||||
scan-digits
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and
|
||||
(< cur input-len)
|
||||
(>= (nth input cur) "0")
|
||||
(<= (nth input cur) "9"))
|
||||
(set! cur (+ cur 1))
|
||||
(scan-digits))))
|
||||
(scan-digits)
|
||||
(if
|
||||
(= cur digit-start)
|
||||
(make-fail "number" pos)
|
||||
(do
|
||||
(when
|
||||
(and (< cur input-len) (= (nth input cur) "."))
|
||||
(set! cur (+ cur 1))
|
||||
(scan-digits))
|
||||
(make-ok (parse-float (slice input start cur)) cur)))))))
|
||||
|
||||
(define
|
||||
string-literal
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(and (< pos (len input)) (= (nth input pos) "\""))
|
||||
(let
|
||||
((cur (+ pos 1)) (input-len (len input)) (chars (list)))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(if
|
||||
(>= cur input-len)
|
||||
(make-fail "closing quote" cur)
|
||||
(let
|
||||
((ch (nth input cur)))
|
||||
(cond
|
||||
(= ch "\"")
|
||||
(make-ok (join "" chars) (+ cur 1))
|
||||
(= ch "\\")
|
||||
(if
|
||||
(>= (+ cur 1) input-len)
|
||||
(make-fail "escape character" cur)
|
||||
(let
|
||||
((next (nth input (+ cur 1))))
|
||||
(cond
|
||||
(= next "n")
|
||||
(do
|
||||
(append! chars "\n")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
(= next "t")
|
||||
(do
|
||||
(append! chars "\t")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
(= next "\\")
|
||||
(do
|
||||
(append! chars "\\")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
(= next "\"")
|
||||
(do
|
||||
(append! chars "\"")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
:else (do
|
||||
(append! chars next)
|
||||
(set! cur (+ cur 2))
|
||||
(loop)))))
|
||||
:else (do (append! chars ch) (set! cur (+ cur 1)) (loop)))))))
|
||||
(loop))
|
||||
(make-fail "string" pos))))
|
||||
|
||||
(define
|
||||
identifier
|
||||
(let
|
||||
((id-start (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (= c "_") (= c "~"))) "identifier start"))
|
||||
(id-char
|
||||
(satisfy
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(and (>= c "a") (<= c "z"))
|
||||
(and (>= c "A") (<= c "Z"))
|
||||
(and (>= c "0") (<= c "9"))
|
||||
(= c "-")
|
||||
(= c "_")
|
||||
(= c "?")
|
||||
(= c "!")
|
||||
(= c "~")
|
||||
(= c "/")))
|
||||
"identifier char")))
|
||||
(fmap
|
||||
(fn (pair) (str (first pair) (join "" (nth pair 1))))
|
||||
(seq (list id-start (many id-char))))))
|
||||
|
||||
;; ── Layer 7: Run parser ───────────────────────────────────────────
|
||||
|
||||
(define run-parser (fn (parser input) (parser input 0)))
|
||||
|
||||
;; ── Layer 8: SX tokenizer ─────────────────────────────────────────
|
||||
|
||||
(define
|
||||
sx-comment
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(and (< pos (len input)) (= (nth input pos) ";"))
|
||||
(let
|
||||
((cur (+ pos 1)) (input-len (len input)))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(if
|
||||
(or (>= cur input-len) (= (nth input cur) "\n"))
|
||||
(make-ok {:type "comment"} cur)
|
||||
(do (set! cur (+ cur 1)) (loop)))))
|
||||
(loop))
|
||||
(make-fail "comment" pos))))
|
||||
|
||||
(define
|
||||
sx-keyword
|
||||
(parse-bind
|
||||
(parse-char ":")
|
||||
(fn (colon) (fmap (fn (name) {:value (str ":" name) :type "keyword"}) identifier))))
|
||||
|
||||
(define
|
||||
sx-symbol
|
||||
(let
|
||||
((sym-char (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")) (= c "-") (= c "_") (= c "?") (= c "!") (= c "+") (= c "*") (= c "/") (= c "<") (= c ">") (= c "=") (= c "~") (= c "&") (= c ".") (= c "#"))) "symbol char")))
|
||||
(fmap (fn (chars) {:value (join "" chars) :type "symbol"}) (many1 sym-char))))
|
||||
|
||||
(define sx-number (fmap (fn (n) {:value n :type "number"}) number-literal))
|
||||
|
||||
(define sx-string (fmap (fn (s) {:value s :type "string"}) string-literal))
|
||||
|
||||
(define
|
||||
sx-token
|
||||
(alt
|
||||
(list
|
||||
sx-comment
|
||||
sx-number
|
||||
sx-string
|
||||
sx-keyword
|
||||
(fmap (fn (c) {:type "open-paren"}) (parse-char "("))
|
||||
(fmap (fn (c) {:type "close-paren"}) (parse-char ")"))
|
||||
(fmap (fn (c) {:type "open-brace"}) (parse-char "{"))
|
||||
(fmap (fn (c) {:type "close-brace"}) (parse-char "}"))
|
||||
(fmap (fn (c) {:type "quote"}) (parse-char "'"))
|
||||
sx-symbol)))
|
||||
|
||||
(define
|
||||
sx-tokenize
|
||||
(fmap
|
||||
(fn
|
||||
(tokens)
|
||||
(filter (fn (t) (not (= (get t "type") "comment"))) tokens))
|
||||
(skip-left skip-spaces (many (skip-right sx-token skip-spaces)))))
|
||||
424
spec/tests/test-parser-combinators.sx
Normal file
424
spec/tests/test-parser-combinators.sx
Normal file
@@ -0,0 +1,424 @@
|
||||
;; Parser Combinator tests
|
||||
|
||||
(defsuite
|
||||
"pc-primitives"
|
||||
(deftest
|
||||
"satisfy matches"
|
||||
(let
|
||||
((r (run-parser (satisfy (fn (c) (= c "a")) "a") "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= "a" (result-value r))
|
||||
(assert= 1 (result-pos r))))
|
||||
(deftest
|
||||
"satisfy fails"
|
||||
(let
|
||||
((r (run-parser (satisfy (fn (c) (= c "x")) "x") "abc")))
|
||||
(assert (not (ok? r)))
|
||||
(assert= 0 (result-pos r))))
|
||||
(deftest
|
||||
"satisfy fails at eof"
|
||||
(let
|
||||
((r (run-parser (satisfy (fn (c) true) "char") "")))
|
||||
(assert (not (ok? r)))))
|
||||
(deftest
|
||||
"parse-char matches"
|
||||
(let
|
||||
((r (run-parser (parse-char "b") "bc")))
|
||||
(assert (ok? r))
|
||||
(assert= "b" (result-value r))))
|
||||
(deftest
|
||||
"parse-char fails"
|
||||
(let
|
||||
((r (run-parser (parse-char "x") "bc")))
|
||||
(assert (not (ok? r)))))
|
||||
(deftest
|
||||
"any-char matches"
|
||||
(let
|
||||
((r (run-parser any-char "hello")))
|
||||
(assert (ok? r))
|
||||
(assert= "h" (result-value r))))
|
||||
(deftest
|
||||
"any-char fails on empty"
|
||||
(assert (not (ok? (run-parser any-char "")))))
|
||||
(deftest
|
||||
"parse-string matches"
|
||||
(let
|
||||
((r (run-parser (parse-string "hello") "hello world")))
|
||||
(assert (ok? r))
|
||||
(assert= "hello" (result-value r))
|
||||
(assert= 5 (result-pos r))))
|
||||
(deftest
|
||||
"parse-string fails on partial"
|
||||
(assert (not (ok? (run-parser (parse-string "hello") "hel")))))
|
||||
(deftest
|
||||
"parse-string fails on mismatch"
|
||||
(assert (not (ok? (run-parser (parse-string "hello") "world")))))
|
||||
(deftest "eof succeeds at end" (assert (ok? (run-parser eof ""))))
|
||||
(deftest
|
||||
"eof fails with remaining"
|
||||
(assert (not (ok? (run-parser eof "x"))))))
|
||||
|
||||
(defsuite
|
||||
"pc-core-combinators"
|
||||
(deftest
|
||||
"fmap transforms value"
|
||||
(let
|
||||
((r (run-parser (fmap upper (parse-char "a")) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= "A" (result-value r))))
|
||||
(deftest
|
||||
"fmap propagates failure"
|
||||
(assert (not (ok? (run-parser (fmap upper (parse-char "x")) "abc")))))
|
||||
(deftest
|
||||
"seq two parsers"
|
||||
(let
|
||||
((r (run-parser (seq (list (parse-char "a") (parse-char "b"))) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= (list "a" "b") (result-value r))
|
||||
(assert= 2 (result-pos r))))
|
||||
(deftest
|
||||
"seq fails on second"
|
||||
(assert
|
||||
(not
|
||||
(ok?
|
||||
(run-parser (seq (list (parse-char "a") (parse-char "x"))) "abc")))))
|
||||
(deftest
|
||||
"seq empty list"
|
||||
(let
|
||||
((r (run-parser (seq (list)) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= (list) (result-value r))
|
||||
(assert= 0 (result-pos r))))
|
||||
(deftest
|
||||
"alt first succeeds"
|
||||
(let
|
||||
((r (run-parser (alt (list (parse-char "a") (parse-char "b"))) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= "a" (result-value r))))
|
||||
(deftest
|
||||
"alt second succeeds"
|
||||
(let
|
||||
((r (run-parser (alt (list (parse-char "x") (parse-char "a"))) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= "a" (result-value r))))
|
||||
(deftest
|
||||
"alt all fail"
|
||||
(assert
|
||||
(not
|
||||
(ok?
|
||||
(run-parser (alt (list (parse-char "x") (parse-char "y"))) "abc")))))
|
||||
(deftest
|
||||
"label overrides error"
|
||||
(let
|
||||
((r (run-parser (label "my-thing" (parse-char "x")) "abc")))
|
||||
(assert (not (ok? r)))
|
||||
(assert= "my-thing" (result-expected r))))
|
||||
(deftest
|
||||
"parse-bind chains"
|
||||
(let
|
||||
((p (parse-bind (parse-char "a") (fn (ch) (parse-char "b")))))
|
||||
(let
|
||||
((r (run-parser p "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= "b" (result-value r))
|
||||
(assert= 2 (result-pos r)))))
|
||||
(deftest
|
||||
"lazy-parser defers"
|
||||
(let
|
||||
((p (lazy-parser (fn () (parse-char "a")))))
|
||||
(assert (ok? (run-parser p "abc"))))))
|
||||
|
||||
(defsuite
|
||||
"pc-repetition"
|
||||
(deftest
|
||||
"many zero matches"
|
||||
(let
|
||||
((r (run-parser (many (parse-char "x")) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= (list) (result-value r))
|
||||
(assert= 0 (result-pos r))))
|
||||
(deftest
|
||||
"many multiple matches"
|
||||
(let
|
||||
((r (run-parser (many (parse-char "a")) "aaab")))
|
||||
(assert (ok? r))
|
||||
(assert= (list "a" "a" "a") (result-value r))
|
||||
(assert= 3 (result-pos r))))
|
||||
(deftest
|
||||
"many1 one match"
|
||||
(let
|
||||
((r (run-parser (many1 (parse-char "a")) "ab")))
|
||||
(assert (ok? r))
|
||||
(assert= (list "a") (result-value r))))
|
||||
(deftest
|
||||
"many1 fails on zero"
|
||||
(assert (not (ok? (run-parser (many1 (parse-char "a")) "bc")))))
|
||||
(deftest
|
||||
"optional present"
|
||||
(let
|
||||
((r (run-parser (optional (parse-char "a")) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= "a" (result-value r))))
|
||||
(deftest
|
||||
"optional absent"
|
||||
(let
|
||||
((r (run-parser (optional (parse-char "x")) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= nil (result-value r))
|
||||
(assert= 0 (result-pos r))))
|
||||
(deftest
|
||||
"skip-many consumes"
|
||||
(let
|
||||
((r (run-parser (skip-many (parse-char " ")) " abc")))
|
||||
(assert (ok? r))
|
||||
(assert= 3 (result-pos r)))))
|
||||
|
||||
(defsuite
|
||||
"pc-structural"
|
||||
(deftest
|
||||
"between parens"
|
||||
(let
|
||||
((r (run-parser (between (parse-char "(") (parse-char ")") (parse-char "x")) "(x)")))
|
||||
(assert (ok? r))
|
||||
(assert= "x" (result-value r))))
|
||||
(deftest
|
||||
"sep-by with items"
|
||||
(let
|
||||
((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "a,a,a")))
|
||||
(assert (ok? r))
|
||||
(assert= (list "a" "a" "a") (result-value r))))
|
||||
(deftest
|
||||
"sep-by empty"
|
||||
(let
|
||||
((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "xyz")))
|
||||
(assert (ok? r))
|
||||
(assert= (list) (result-value r))))
|
||||
(deftest
|
||||
"sep-by1 with items"
|
||||
(let
|
||||
((r (run-parser (sep-by1 digit (parse-char ",")) "1,2,3")))
|
||||
(assert (ok? r))
|
||||
(assert= (list "1" "2" "3") (result-value r))))
|
||||
(deftest
|
||||
"sep-by1 fails on empty"
|
||||
(assert
|
||||
(not (ok? (run-parser (sep-by1 digit (parse-char ",")) "xyz")))))
|
||||
(deftest
|
||||
"skip-left discards first"
|
||||
(let
|
||||
((r (run-parser (skip-left (parse-char " ") (parse-char "x")) " x")))
|
||||
(assert (ok? r))
|
||||
(assert= "x" (result-value r))))
|
||||
(deftest
|
||||
"skip-right discards second"
|
||||
(let
|
||||
((r (run-parser (skip-right (parse-char "x") (parse-char " ")) "x ")))
|
||||
(assert (ok? r))
|
||||
(assert= "x" (result-value r))))
|
||||
(deftest
|
||||
"not-followed-by succeeds on failure"
|
||||
(let
|
||||
((r (run-parser (not-followed-by (parse-char "x")) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= 0 (result-pos r))))
|
||||
(deftest
|
||||
"not-followed-by fails on success"
|
||||
(assert
|
||||
(not (ok? (run-parser (not-followed-by (parse-char "a")) "abc")))))
|
||||
(deftest
|
||||
"look-ahead preserves position"
|
||||
(let
|
||||
((r (run-parser (look-ahead (parse-char "a")) "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= "a" (result-value r))
|
||||
(assert= 0 (result-pos r)))))
|
||||
|
||||
(defsuite
|
||||
"pc-char-classes"
|
||||
(deftest "digit matches" (assert (ok? (run-parser digit "5"))))
|
||||
(deftest
|
||||
"digit fails on letter"
|
||||
(assert (not (ok? (run-parser digit "a")))))
|
||||
(deftest "letter matches" (assert (ok? (run-parser letter "a"))))
|
||||
(deftest
|
||||
"letter fails on digit"
|
||||
(assert (not (ok? (run-parser letter "5")))))
|
||||
(deftest
|
||||
"alpha-num matches digit"
|
||||
(assert (ok? (run-parser alpha-num "5"))))
|
||||
(deftest
|
||||
"alpha-num matches letter"
|
||||
(assert (ok? (run-parser alpha-num "z"))))
|
||||
(deftest
|
||||
"skip-spaces skips"
|
||||
(let
|
||||
((r (run-parser skip-spaces " \t\nabc")))
|
||||
(assert (ok? r))
|
||||
(assert= 4 (result-pos r))))
|
||||
(deftest
|
||||
"skip-spaces on no spaces"
|
||||
(let
|
||||
((r (run-parser skip-spaces "abc")))
|
||||
(assert (ok? r))
|
||||
(assert= 0 (result-pos r)))))
|
||||
|
||||
(defsuite
|
||||
"pc-literals"
|
||||
(deftest
|
||||
"number integer"
|
||||
(let
|
||||
((r (run-parser number-literal "42abc")))
|
||||
(assert (ok? r))
|
||||
(assert= 42 (result-value r))
|
||||
(assert= 2 (result-pos r))))
|
||||
(deftest
|
||||
"number decimal"
|
||||
(let
|
||||
((r (run-parser number-literal "3.14rest")))
|
||||
(assert (ok? r))
|
||||
(assert= 3.14 (result-value r))))
|
||||
(deftest
|
||||
"number negative"
|
||||
(let
|
||||
((r (run-parser number-literal "-7x")))
|
||||
(assert (ok? r))
|
||||
(assert= -7 (result-value r))))
|
||||
(deftest
|
||||
"number fails on non-digit"
|
||||
(assert (not (ok? (run-parser number-literal "abc")))))
|
||||
(deftest
|
||||
"string-literal basic"
|
||||
(let
|
||||
((r (run-parser string-literal "\"hello\" rest")))
|
||||
(assert (ok? r))
|
||||
(assert= "hello" (result-value r))))
|
||||
(deftest
|
||||
"string-literal with escapes"
|
||||
(let
|
||||
((r (run-parser string-literal "\"a\\nb\"")))
|
||||
(assert (ok? r))
|
||||
(assert= "a\nb" (result-value r))))
|
||||
(deftest
|
||||
"string-literal fails without quote"
|
||||
(assert (not (ok? (run-parser string-literal "hello")))))
|
||||
(deftest
|
||||
"identifier basic"
|
||||
(let
|
||||
((r (run-parser identifier "hello-world rest")))
|
||||
(assert (ok? r))
|
||||
(assert= "hello-world" (result-value r))))
|
||||
(deftest
|
||||
"identifier with special chars"
|
||||
(let
|
||||
((r (run-parser identifier "empty? x")))
|
||||
(assert (ok? r))
|
||||
(assert= "empty?" (result-value r)))))
|
||||
|
||||
(defsuite
|
||||
"pc-sx-tokenizer"
|
||||
(deftest
|
||||
"tokenize number"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize "42"))))
|
||||
(assert= 1 (len tokens))
|
||||
(assert= "number" (get (first tokens) "type"))
|
||||
(assert= 42 (get (first tokens) "value"))))
|
||||
(deftest
|
||||
"tokenize string"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize "\"hi\""))))
|
||||
(assert= 1 (len tokens))
|
||||
(assert= "string" (get (first tokens) "type"))
|
||||
(assert= "hi" (get (first tokens) "value"))))
|
||||
(deftest
|
||||
"tokenize keyword"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize ":ok"))))
|
||||
(assert= 1 (len tokens))
|
||||
(assert= "keyword" (get (first tokens) "type"))
|
||||
(assert= ":ok" (get (first tokens) "value"))))
|
||||
(deftest
|
||||
"tokenize symbol"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize "define"))))
|
||||
(assert= 1 (len tokens))
|
||||
(assert= "symbol" (get (first tokens) "type"))
|
||||
(assert= "define" (get (first tokens) "value"))))
|
||||
(deftest
|
||||
"tokenize simple expression"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize "(+ 1 2)"))))
|
||||
(assert= 5 (len tokens))
|
||||
(assert= "open-paren" (get (first tokens) "type"))
|
||||
(assert= "symbol" (get (nth tokens 1) "type"))
|
||||
(assert= "+" (get (nth tokens 1) "value"))
|
||||
(assert= "number" (get (nth tokens 2) "type"))
|
||||
(assert= "close-paren" (get (nth tokens 4) "type"))))
|
||||
(deftest
|
||||
"tokenize define expression"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize "(define x 42)"))))
|
||||
(assert= 5 (len tokens))
|
||||
(assert= "symbol" (get (nth tokens 1) "type"))
|
||||
(assert= "define" (get (nth tokens 1) "value"))
|
||||
(assert= "number" (get (nth tokens 3) "type"))
|
||||
(assert= 42 (get (nth tokens 3) "value"))))
|
||||
(deftest
|
||||
"tokenize dict literal"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize "{:ok true :value 42}"))))
|
||||
(assert= 6 (len tokens))
|
||||
(assert= "open-brace" (get (first tokens) "type"))
|
||||
(assert= "keyword" (get (nth tokens 1) "type"))
|
||||
(assert= "close-brace" (get (nth tokens 5) "type"))))
|
||||
(deftest
|
||||
"tokenize with comment"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize ";; comment\n42"))))
|
||||
(assert= 1 (len tokens))
|
||||
(assert= "number" (get (first tokens) "type"))))
|
||||
(deftest
|
||||
"tokenize nested expression"
|
||||
(let
|
||||
((tokens (result-value (run-parser sx-tokenize "(+ 1 (* 2 3))"))))
|
||||
(assert= 9 (len tokens))
|
||||
(assert= "open-paren" (get (first tokens) "type"))
|
||||
(assert= "open-paren" (get (nth tokens 3) "type"))
|
||||
(assert= "close-paren" (get (nth tokens 7) "type"))
|
||||
(assert= "close-paren" (get (nth tokens 8) "type")))))
|
||||
|
||||
(defsuite
|
||||
"pc-recursive"
|
||||
(deftest
|
||||
"lazy-parser enables recursion"
|
||||
(define
|
||||
nested-a
|
||||
(lazy-parser
|
||||
(fn
|
||||
()
|
||||
(alt
|
||||
(list
|
||||
(parse-char "a")
|
||||
(fmap
|
||||
(fn (parts) (nth parts 1))
|
||||
(seq (list (parse-char "(") nested-a (parse-char ")")))))))))
|
||||
(assert (ok? (run-parser nested-a "a")))
|
||||
(assert= "a" (result-value (run-parser nested-a "a")))
|
||||
(assert= "a" (result-value (run-parser nested-a "(a)")))
|
||||
(assert= "a" (result-value (run-parser nested-a "((a))"))))
|
||||
(deftest
|
||||
"matched parens depth"
|
||||
(define
|
||||
paren-depth
|
||||
(lazy-parser
|
||||
(fn
|
||||
()
|
||||
(alt
|
||||
(list
|
||||
(fmap
|
||||
(fn (parts) (+ 1 (nth parts 1)))
|
||||
(seq (list (parse-char "(") paren-depth (parse-char ")"))))
|
||||
(fn (input pos) (make-ok 0 pos)))))))
|
||||
(assert= 0 (result-value (run-parser paren-depth "")))
|
||||
(assert= 1 (result-value (run-parser paren-depth "()")))
|
||||
(assert= 3 (result-value (run-parser paren-depth "((()))")))))
|
||||
Reference in New Issue
Block a user