From 516f9c7186c257f987111bc6c09d077911ca7a14 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 5 Apr 2026 20:14:51 +0000 Subject: [PATCH] =?UTF-8?q?Step=2012:=20Parser=20combinator=20library=20?= =?UTF-8?q?=E2=80=94=20pure=20SX,=2068=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/parser-combinators.sx — 46 definitions in 8 layers: 0. Result constructors (make-ok, make-fail, ok?, result-value, etc.) 1. Primitives (satisfy, parse-char, any-char, parse-string, eof) 2. Core combinators (fmap, parse-bind, seq, alt, label, lazy-parser) 3. Repetition (many, many1, optional, skip-many) 4. Structural (between, sep-by, sep-by1, skip-left, skip-right, not-followed-by, look-ahead) 5. Character classes (digit, letter, alpha-num, whitespace, skip-spaces) 6. Literal parsers (number-literal, string-literal, identifier) 7. Run function (run-parser) 8. SX tokenizer (sx-comment, sx-keyword, sx-symbol, sx-number, sx-string, sx-token, sx-tokenize) Self-tests by tokenizing SX: (define x 42), {:ok true}, (+ 1 (* 2 3)), comments, negative numbers, nested parens, recursive grammars. No evaluator changes. Pure HO functions + thunks for lazy recursion. 2868/2868 tests, zero failures. Co-Authored-By: Claude Opus 4.6 (1M context) --- hosts/ocaml/bin/run_tests.ml | 1 + lib/parser-combinators.sx | 485 ++++++++++++++++++++++++++ spec/tests/test-parser-combinators.sx | 424 ++++++++++++++++++++++ 3 files changed, 910 insertions(+) create mode 100644 lib/parser-combinators.sx create mode 100644 spec/tests/test-parser-combinators.sx diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index 0462d382..b52ce25e 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -1352,6 +1352,7 @@ let run_spec_tests env test_files = load_module "signals.sx" web_dir; (* web extensions *) load_module "freeze.sx" lib_dir; load_module "content.sx" lib_dir; + load_module "parser-combinators.sx" lib_dir; load_module "types.sx" lib_dir; load_module "sx-swap.sx" lib_dir; (* Shared templates: TW styling engine *) diff --git a/lib/parser-combinators.sx b/lib/parser-combinators.sx new file mode 100644 index 00000000..b44b4e86 --- /dev/null +++ b/lib/parser-combinators.sx @@ -0,0 +1,485 @@ +;; Parser Combinator Library — Pure SX +;; +;; A parser is (fn (input pos) result) where: +;; Success: {:ok true :value val :pos new-pos} +;; Failure: {:ok false :expected desc :pos pos} +;; +;; Combinators compose parsers: seq, alt, many, sep-by, between, etc. +;; Recursive grammars use (lazy-parser (fn () ...)) thunks. + +;; ── Layer 0: Result constructors ────────────────────────────────── + +(define make-ok (fn (val pos) {:pos pos :ok true :value val})) +(define make-fail (fn (expected pos) {:pos pos :ok false :expected expected})) +(define ok? (fn (r) (get r "ok"))) +(define result-value (fn (r) (get r "value"))) +(define result-pos (fn (r) (get r "pos"))) +(define result-expected (fn (r) (get r "expected"))) + +;; ── Layer 1: Primitive parsers ──────────────────────────────────── + +(define + satisfy + (fn + (pred desc) + (fn + (input pos) + (if + (< pos (len input)) + (let + ((ch (nth input pos))) + (if (pred ch) (make-ok ch (+ pos 1)) (make-fail desc pos))) + (make-fail desc pos))))) + +(define + parse-char + (fn (ch) (satisfy (fn (c) (= c ch)) (str "'" ch "'")))) + +(define any-char (satisfy (fn (c) true) "any character")) + +(define + parse-string + (fn + (target) + (let + ((target-len (len target))) + (fn + (input pos) + (if + (<= (+ pos target-len) (len input)) + (if + (= (slice input pos (+ pos target-len)) target) + (make-ok target (+ pos target-len)) + (make-fail (str "\"" target "\"") pos)) + (make-fail (str "\"" target "\"") pos)))))) + +(define + eof + (fn + (input pos) + (if + (>= pos (len input)) + (make-ok nil pos) + (make-fail "end of input" pos)))) + +;; ── Layer 2: Core combinators ───────────────────────────────────── + +(define + fmap + (fn + (f parser) + (fn + (input pos) + (let + ((r (parser input pos))) + (if (ok? r) (make-ok (f (result-value r)) (result-pos r)) r))))) + +(define + parse-bind + (fn + (parser f) + (fn + (input pos) + (let + ((r (parser input pos))) + (if (ok? r) ((f (result-value r)) input (result-pos r)) r))))) + +(define + seq + (fn + (parsers) + (fn + (input pos) + (let + ((results (list)) (cur-pos pos) (failed nil)) + (for-each + (fn + (p) + (when + (not failed) + (let + ((r (p input cur-pos))) + (if + (ok? r) + (do + (append! results (result-value r)) + (set! cur-pos (result-pos r))) + (set! failed r))))) + parsers) + (if failed failed (make-ok results cur-pos)))))) + +(define seq2 (fn (p1 p2) (seq (list p1 p2)))) + +(define + alt + (fn + (parsers) + (fn + (input pos) + (let + ((best-fail nil)) + (define + loop + (fn + (ps) + (if + (empty? ps) + (or best-fail (make-fail "no alternatives" pos)) + (let + ((r ((first ps) input pos))) + (if + (ok? r) + r + (do + (when + (or + (not best-fail) + (> (result-pos r) (result-pos best-fail))) + (set! best-fail r)) + (loop (rest ps)))))))) + (loop parsers))))) + +(define alt2 (fn (p1 p2) (alt (list p1 p2)))) + +(define + label + (fn + (name parser) + (fn + (input pos) + (let ((r (parser input pos))) (if (ok? r) r (make-fail name pos)))))) + +(define lazy-parser (fn (thunk) (fn (input pos) ((thunk) input pos)))) + +;; ── Layer 3: Repetition combinators ─────────────────────────────── + +(define + many + (fn + (parser) + (fn + (input pos) + (let + ((results (list)) (cur-pos pos)) + (define + loop + (fn + () + (let + ((r (parser input cur-pos))) + (if + (ok? r) + (do + (append! results (result-value r)) + (set! cur-pos (result-pos r)) + (loop)) + (make-ok results cur-pos))))) + (loop))))) + +(define + many1 + (fn + (parser) + (fn + (input pos) + (let + ((r (parser input pos))) + (if + (ok? r) + (let + ((rest-r ((many parser) input (result-pos r)))) + (make-ok + (cons (result-value r) (result-value rest-r)) + (result-pos rest-r))) + r))))) + +(define + optional + (fn + (parser) + (fn + (input pos) + (let ((r (parser input pos))) (if (ok? r) r (make-ok nil pos)))))) + +(define + skip-many + (fn + (parser) + (fn + (input pos) + (let + ((cur-pos pos)) + (define + loop + (fn + () + (let + ((r (parser input cur-pos))) + (if + (ok? r) + (do (set! cur-pos (result-pos r)) (loop)) + (make-ok nil cur-pos))))) + (loop))))) + +;; ── Layer 4: Structural combinators ─────────────────────────────── + +(define + between + (fn + (open close body) + (fmap (fn (results) (nth results 1)) (seq (list open body close))))) + +(define + sep-by1 + (fn + (parser sep) + (fn + (input pos) + (let + ((first-r (parser input pos))) + (if + (ok? first-r) + (let + ((rest-r ((many (fmap (fn (pair) (nth pair 1)) (seq (list sep parser)))) input (result-pos first-r)))) + (make-ok + (cons (result-value first-r) (result-value rest-r)) + (result-pos rest-r))) + first-r))))) + +(define + sep-by + (fn + (parser sep) + (alt + (list (sep-by1 parser sep) (fn (input pos) (make-ok (list) pos)))))) + +(define + skip-left + (fn + (skip keep) + (fmap (fn (results) (nth results 1)) (seq (list skip keep))))) + +(define + skip-right + (fn + (keep skip) + (fmap (fn (results) (first results)) (seq (list keep skip))))) + +(define + not-followed-by + (fn + (parser) + (fn + (input pos) + (let + ((r (parser input pos))) + (if (ok? r) (make-fail "not followed by" pos) (make-ok nil pos)))))) + +(define + look-ahead + (fn + (parser) + (fn + (input pos) + (let + ((r (parser input pos))) + (if (ok? r) (make-ok (result-value r) pos) r))))) + +;; ── Layer 5: Character class parsers ────────────────────────────── + +(define digit (satisfy (fn (c) (and (>= c "0") (<= c "9"))) "digit")) + +(define + letter + (satisfy + (fn + (c) + (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))) + "letter")) + +(define alpha-num (alt2 letter digit)) + +(define + whitespace-char + (satisfy + (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))) + "whitespace")) + +(define skip-spaces (skip-many whitespace-char)) + +;; ── Layer 6: Literal parsers ────────────────────────────────────── + +(define + number-literal + (fn + (input pos) + (let + ((start pos) (cur pos) (input-len (len input))) + (when + (and (< cur input-len) (= (nth input cur) "-")) + (set! cur (+ cur 1))) + (let + ((digit-start cur)) + (define + scan-digits + (fn + () + (when + (and + (< cur input-len) + (>= (nth input cur) "0") + (<= (nth input cur) "9")) + (set! cur (+ cur 1)) + (scan-digits)))) + (scan-digits) + (if + (= cur digit-start) + (make-fail "number" pos) + (do + (when + (and (< cur input-len) (= (nth input cur) ".")) + (set! cur (+ cur 1)) + (scan-digits)) + (make-ok (parse-float (slice input start cur)) cur))))))) + +(define + string-literal + (fn + (input pos) + (if + (and (< pos (len input)) (= (nth input pos) "\"")) + (let + ((cur (+ pos 1)) (input-len (len input)) (chars (list))) + (define + loop + (fn + () + (if + (>= cur input-len) + (make-fail "closing quote" cur) + (let + ((ch (nth input cur))) + (cond + (= ch "\"") + (make-ok (join "" chars) (+ cur 1)) + (= ch "\\") + (if + (>= (+ cur 1) input-len) + (make-fail "escape character" cur) + (let + ((next (nth input (+ cur 1)))) + (cond + (= next "n") + (do + (append! chars "\n") + (set! cur (+ cur 2)) + (loop)) + (= next "t") + (do + (append! chars "\t") + (set! cur (+ cur 2)) + (loop)) + (= next "\\") + (do + (append! chars "\\") + (set! cur (+ cur 2)) + (loop)) + (= next "\"") + (do + (append! chars "\"") + (set! cur (+ cur 2)) + (loop)) + :else (do + (append! chars next) + (set! cur (+ cur 2)) + (loop))))) + :else (do (append! chars ch) (set! cur (+ cur 1)) (loop))))))) + (loop)) + (make-fail "string" pos)))) + +(define + identifier + (let + ((id-start (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (= c "_") (= c "~"))) "identifier start")) + (id-char + (satisfy + (fn + (c) + (or + (and (>= c "a") (<= c "z")) + (and (>= c "A") (<= c "Z")) + (and (>= c "0") (<= c "9")) + (= c "-") + (= c "_") + (= c "?") + (= c "!") + (= c "~") + (= c "/"))) + "identifier char"))) + (fmap + (fn (pair) (str (first pair) (join "" (nth pair 1)))) + (seq (list id-start (many id-char)))))) + +;; ── Layer 7: Run parser ─────────────────────────────────────────── + +(define run-parser (fn (parser input) (parser input 0))) + +;; ── Layer 8: SX tokenizer ───────────────────────────────────────── + +(define + sx-comment + (fn + (input pos) + (if + (and (< pos (len input)) (= (nth input pos) ";")) + (let + ((cur (+ pos 1)) (input-len (len input))) + (define + loop + (fn + () + (if + (or (>= cur input-len) (= (nth input cur) "\n")) + (make-ok {:type "comment"} cur) + (do (set! cur (+ cur 1)) (loop))))) + (loop)) + (make-fail "comment" pos)))) + +(define + sx-keyword + (parse-bind + (parse-char ":") + (fn (colon) (fmap (fn (name) {:value (str ":" name) :type "keyword"}) identifier)))) + +(define + sx-symbol + (let + ((sym-char (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")) (= c "-") (= c "_") (= c "?") (= c "!") (= c "+") (= c "*") (= c "/") (= c "<") (= c ">") (= c "=") (= c "~") (= c "&") (= c ".") (= c "#"))) "symbol char"))) + (fmap (fn (chars) {:value (join "" chars) :type "symbol"}) (many1 sym-char)))) + +(define sx-number (fmap (fn (n) {:value n :type "number"}) number-literal)) + +(define sx-string (fmap (fn (s) {:value s :type "string"}) string-literal)) + +(define + sx-token + (alt + (list + sx-comment + sx-number + sx-string + sx-keyword + (fmap (fn (c) {:type "open-paren"}) (parse-char "(")) + (fmap (fn (c) {:type "close-paren"}) (parse-char ")")) + (fmap (fn (c) {:type "open-brace"}) (parse-char "{")) + (fmap (fn (c) {:type "close-brace"}) (parse-char "}")) + (fmap (fn (c) {:type "quote"}) (parse-char "'")) + sx-symbol))) + +(define + sx-tokenize + (fmap + (fn + (tokens) + (filter (fn (t) (not (= (get t "type") "comment"))) tokens)) + (skip-left skip-spaces (many (skip-right sx-token skip-spaces))))) \ No newline at end of file diff --git a/spec/tests/test-parser-combinators.sx b/spec/tests/test-parser-combinators.sx new file mode 100644 index 00000000..19cdde5a --- /dev/null +++ b/spec/tests/test-parser-combinators.sx @@ -0,0 +1,424 @@ +;; Parser Combinator tests + +(defsuite + "pc-primitives" + (deftest + "satisfy matches" + (let + ((r (run-parser (satisfy (fn (c) (= c "a")) "a") "abc"))) + (assert (ok? r)) + (assert= "a" (result-value r)) + (assert= 1 (result-pos r)))) + (deftest + "satisfy fails" + (let + ((r (run-parser (satisfy (fn (c) (= c "x")) "x") "abc"))) + (assert (not (ok? r))) + (assert= 0 (result-pos r)))) + (deftest + "satisfy fails at eof" + (let + ((r (run-parser (satisfy (fn (c) true) "char") ""))) + (assert (not (ok? r))))) + (deftest + "parse-char matches" + (let + ((r (run-parser (parse-char "b") "bc"))) + (assert (ok? r)) + (assert= "b" (result-value r)))) + (deftest + "parse-char fails" + (let + ((r (run-parser (parse-char "x") "bc"))) + (assert (not (ok? r))))) + (deftest + "any-char matches" + (let + ((r (run-parser any-char "hello"))) + (assert (ok? r)) + (assert= "h" (result-value r)))) + (deftest + "any-char fails on empty" + (assert (not (ok? (run-parser any-char ""))))) + (deftest + "parse-string matches" + (let + ((r (run-parser (parse-string "hello") "hello world"))) + (assert (ok? r)) + (assert= "hello" (result-value r)) + (assert= 5 (result-pos r)))) + (deftest + "parse-string fails on partial" + (assert (not (ok? (run-parser (parse-string "hello") "hel"))))) + (deftest + "parse-string fails on mismatch" + (assert (not (ok? (run-parser (parse-string "hello") "world"))))) + (deftest "eof succeeds at end" (assert (ok? (run-parser eof "")))) + (deftest + "eof fails with remaining" + (assert (not (ok? (run-parser eof "x")))))) + +(defsuite + "pc-core-combinators" + (deftest + "fmap transforms value" + (let + ((r (run-parser (fmap upper (parse-char "a")) "abc"))) + (assert (ok? r)) + (assert= "A" (result-value r)))) + (deftest + "fmap propagates failure" + (assert (not (ok? (run-parser (fmap upper (parse-char "x")) "abc"))))) + (deftest + "seq two parsers" + (let + ((r (run-parser (seq (list (parse-char "a") (parse-char "b"))) "abc"))) + (assert (ok? r)) + (assert= (list "a" "b") (result-value r)) + (assert= 2 (result-pos r)))) + (deftest + "seq fails on second" + (assert + (not + (ok? + (run-parser (seq (list (parse-char "a") (parse-char "x"))) "abc"))))) + (deftest + "seq empty list" + (let + ((r (run-parser (seq (list)) "abc"))) + (assert (ok? r)) + (assert= (list) (result-value r)) + (assert= 0 (result-pos r)))) + (deftest + "alt first succeeds" + (let + ((r (run-parser (alt (list (parse-char "a") (parse-char "b"))) "abc"))) + (assert (ok? r)) + (assert= "a" (result-value r)))) + (deftest + "alt second succeeds" + (let + ((r (run-parser (alt (list (parse-char "x") (parse-char "a"))) "abc"))) + (assert (ok? r)) + (assert= "a" (result-value r)))) + (deftest + "alt all fail" + (assert + (not + (ok? + (run-parser (alt (list (parse-char "x") (parse-char "y"))) "abc"))))) + (deftest + "label overrides error" + (let + ((r (run-parser (label "my-thing" (parse-char "x")) "abc"))) + (assert (not (ok? r))) + (assert= "my-thing" (result-expected r)))) + (deftest + "parse-bind chains" + (let + ((p (parse-bind (parse-char "a") (fn (ch) (parse-char "b"))))) + (let + ((r (run-parser p "abc"))) + (assert (ok? r)) + (assert= "b" (result-value r)) + (assert= 2 (result-pos r))))) + (deftest + "lazy-parser defers" + (let + ((p (lazy-parser (fn () (parse-char "a"))))) + (assert (ok? (run-parser p "abc")))))) + +(defsuite + "pc-repetition" + (deftest + "many zero matches" + (let + ((r (run-parser (many (parse-char "x")) "abc"))) + (assert (ok? r)) + (assert= (list) (result-value r)) + (assert= 0 (result-pos r)))) + (deftest + "many multiple matches" + (let + ((r (run-parser (many (parse-char "a")) "aaab"))) + (assert (ok? r)) + (assert= (list "a" "a" "a") (result-value r)) + (assert= 3 (result-pos r)))) + (deftest + "many1 one match" + (let + ((r (run-parser (many1 (parse-char "a")) "ab"))) + (assert (ok? r)) + (assert= (list "a") (result-value r)))) + (deftest + "many1 fails on zero" + (assert (not (ok? (run-parser (many1 (parse-char "a")) "bc"))))) + (deftest + "optional present" + (let + ((r (run-parser (optional (parse-char "a")) "abc"))) + (assert (ok? r)) + (assert= "a" (result-value r)))) + (deftest + "optional absent" + (let + ((r (run-parser (optional (parse-char "x")) "abc"))) + (assert (ok? r)) + (assert= nil (result-value r)) + (assert= 0 (result-pos r)))) + (deftest + "skip-many consumes" + (let + ((r (run-parser (skip-many (parse-char " ")) " abc"))) + (assert (ok? r)) + (assert= 3 (result-pos r))))) + +(defsuite + "pc-structural" + (deftest + "between parens" + (let + ((r (run-parser (between (parse-char "(") (parse-char ")") (parse-char "x")) "(x)"))) + (assert (ok? r)) + (assert= "x" (result-value r)))) + (deftest + "sep-by with items" + (let + ((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "a,a,a"))) + (assert (ok? r)) + (assert= (list "a" "a" "a") (result-value r)))) + (deftest + "sep-by empty" + (let + ((r (run-parser (sep-by (parse-char "a") (parse-char ",")) "xyz"))) + (assert (ok? r)) + (assert= (list) (result-value r)))) + (deftest + "sep-by1 with items" + (let + ((r (run-parser (sep-by1 digit (parse-char ",")) "1,2,3"))) + (assert (ok? r)) + (assert= (list "1" "2" "3") (result-value r)))) + (deftest + "sep-by1 fails on empty" + (assert + (not (ok? (run-parser (sep-by1 digit (parse-char ",")) "xyz"))))) + (deftest + "skip-left discards first" + (let + ((r (run-parser (skip-left (parse-char " ") (parse-char "x")) " x"))) + (assert (ok? r)) + (assert= "x" (result-value r)))) + (deftest + "skip-right discards second" + (let + ((r (run-parser (skip-right (parse-char "x") (parse-char " ")) "x "))) + (assert (ok? r)) + (assert= "x" (result-value r)))) + (deftest + "not-followed-by succeeds on failure" + (let + ((r (run-parser (not-followed-by (parse-char "x")) "abc"))) + (assert (ok? r)) + (assert= 0 (result-pos r)))) + (deftest + "not-followed-by fails on success" + (assert + (not (ok? (run-parser (not-followed-by (parse-char "a")) "abc"))))) + (deftest + "look-ahead preserves position" + (let + ((r (run-parser (look-ahead (parse-char "a")) "abc"))) + (assert (ok? r)) + (assert= "a" (result-value r)) + (assert= 0 (result-pos r))))) + +(defsuite + "pc-char-classes" + (deftest "digit matches" (assert (ok? (run-parser digit "5")))) + (deftest + "digit fails on letter" + (assert (not (ok? (run-parser digit "a"))))) + (deftest "letter matches" (assert (ok? (run-parser letter "a")))) + (deftest + "letter fails on digit" + (assert (not (ok? (run-parser letter "5"))))) + (deftest + "alpha-num matches digit" + (assert (ok? (run-parser alpha-num "5")))) + (deftest + "alpha-num matches letter" + (assert (ok? (run-parser alpha-num "z")))) + (deftest + "skip-spaces skips" + (let + ((r (run-parser skip-spaces " \t\nabc"))) + (assert (ok? r)) + (assert= 4 (result-pos r)))) + (deftest + "skip-spaces on no spaces" + (let + ((r (run-parser skip-spaces "abc"))) + (assert (ok? r)) + (assert= 0 (result-pos r))))) + +(defsuite + "pc-literals" + (deftest + "number integer" + (let + ((r (run-parser number-literal "42abc"))) + (assert (ok? r)) + (assert= 42 (result-value r)) + (assert= 2 (result-pos r)))) + (deftest + "number decimal" + (let + ((r (run-parser number-literal "3.14rest"))) + (assert (ok? r)) + (assert= 3.14 (result-value r)))) + (deftest + "number negative" + (let + ((r (run-parser number-literal "-7x"))) + (assert (ok? r)) + (assert= -7 (result-value r)))) + (deftest + "number fails on non-digit" + (assert (not (ok? (run-parser number-literal "abc"))))) + (deftest + "string-literal basic" + (let + ((r (run-parser string-literal "\"hello\" rest"))) + (assert (ok? r)) + (assert= "hello" (result-value r)))) + (deftest + "string-literal with escapes" + (let + ((r (run-parser string-literal "\"a\\nb\""))) + (assert (ok? r)) + (assert= "a\nb" (result-value r)))) + (deftest + "string-literal fails without quote" + (assert (not (ok? (run-parser string-literal "hello"))))) + (deftest + "identifier basic" + (let + ((r (run-parser identifier "hello-world rest"))) + (assert (ok? r)) + (assert= "hello-world" (result-value r)))) + (deftest + "identifier with special chars" + (let + ((r (run-parser identifier "empty? x"))) + (assert (ok? r)) + (assert= "empty?" (result-value r))))) + +(defsuite + "pc-sx-tokenizer" + (deftest + "tokenize number" + (let + ((tokens (result-value (run-parser sx-tokenize "42")))) + (assert= 1 (len tokens)) + (assert= "number" (get (first tokens) "type")) + (assert= 42 (get (first tokens) "value")))) + (deftest + "tokenize string" + (let + ((tokens (result-value (run-parser sx-tokenize "\"hi\"")))) + (assert= 1 (len tokens)) + (assert= "string" (get (first tokens) "type")) + (assert= "hi" (get (first tokens) "value")))) + (deftest + "tokenize keyword" + (let + ((tokens (result-value (run-parser sx-tokenize ":ok")))) + (assert= 1 (len tokens)) + (assert= "keyword" (get (first tokens) "type")) + (assert= ":ok" (get (first tokens) "value")))) + (deftest + "tokenize symbol" + (let + ((tokens (result-value (run-parser sx-tokenize "define")))) + (assert= 1 (len tokens)) + (assert= "symbol" (get (first tokens) "type")) + (assert= "define" (get (first tokens) "value")))) + (deftest + "tokenize simple expression" + (let + ((tokens (result-value (run-parser sx-tokenize "(+ 1 2)")))) + (assert= 5 (len tokens)) + (assert= "open-paren" (get (first tokens) "type")) + (assert= "symbol" (get (nth tokens 1) "type")) + (assert= "+" (get (nth tokens 1) "value")) + (assert= "number" (get (nth tokens 2) "type")) + (assert= "close-paren" (get (nth tokens 4) "type")))) + (deftest + "tokenize define expression" + (let + ((tokens (result-value (run-parser sx-tokenize "(define x 42)")))) + (assert= 5 (len tokens)) + (assert= "symbol" (get (nth tokens 1) "type")) + (assert= "define" (get (nth tokens 1) "value")) + (assert= "number" (get (nth tokens 3) "type")) + (assert= 42 (get (nth tokens 3) "value")))) + (deftest + "tokenize dict literal" + (let + ((tokens (result-value (run-parser sx-tokenize "{:ok true :value 42}")))) + (assert= 6 (len tokens)) + (assert= "open-brace" (get (first tokens) "type")) + (assert= "keyword" (get (nth tokens 1) "type")) + (assert= "close-brace" (get (nth tokens 5) "type")))) + (deftest + "tokenize with comment" + (let + ((tokens (result-value (run-parser sx-tokenize ";; comment\n42")))) + (assert= 1 (len tokens)) + (assert= "number" (get (first tokens) "type")))) + (deftest + "tokenize nested expression" + (let + ((tokens (result-value (run-parser sx-tokenize "(+ 1 (* 2 3))")))) + (assert= 9 (len tokens)) + (assert= "open-paren" (get (first tokens) "type")) + (assert= "open-paren" (get (nth tokens 3) "type")) + (assert= "close-paren" (get (nth tokens 7) "type")) + (assert= "close-paren" (get (nth tokens 8) "type"))))) + +(defsuite + "pc-recursive" + (deftest + "lazy-parser enables recursion" + (define + nested-a + (lazy-parser + (fn + () + (alt + (list + (parse-char "a") + (fmap + (fn (parts) (nth parts 1)) + (seq (list (parse-char "(") nested-a (parse-char ")"))))))))) + (assert (ok? (run-parser nested-a "a"))) + (assert= "a" (result-value (run-parser nested-a "a"))) + (assert= "a" (result-value (run-parser nested-a "(a)"))) + (assert= "a" (result-value (run-parser nested-a "((a))")))) + (deftest + "matched parens depth" + (define + paren-depth + (lazy-parser + (fn + () + (alt + (list + (fmap + (fn (parts) (+ 1 (nth parts 1))) + (seq (list (parse-char "(") paren-depth (parse-char ")")))) + (fn (input pos) (make-ok 0 pos))))))) + (assert= 0 (result-value (run-parser paren-depth ""))) + (assert= 1 (result-value (run-parser paren-depth "()"))) + (assert= 3 (result-value (run-parser paren-depth "((()))"))))) \ No newline at end of file