Step 12: Parser combinator library — pure SX, 68 tests
lib/parser-combinators.sx — 46 definitions in 8 layers:
0. Result constructors (make-ok, make-fail, ok?, result-value, etc.)
1. Primitives (satisfy, parse-char, any-char, parse-string, eof)
2. Core combinators (fmap, parse-bind, seq, alt, label, lazy-parser)
3. Repetition (many, many1, optional, skip-many)
4. Structural (between, sep-by, sep-by1, skip-left, skip-right,
not-followed-by, look-ahead)
5. Character classes (digit, letter, alpha-num, whitespace, skip-spaces)
6. Literal parsers (number-literal, string-literal, identifier)
7. Run function (run-parser)
8. SX tokenizer (sx-comment, sx-keyword, sx-symbol, sx-number,
sx-string, sx-token, sx-tokenize)
Self-tests by tokenizing SX: (define x 42), {:ok true}, (+ 1 (* 2 3)),
comments, negative numbers, nested parens, recursive grammars.
No evaluator changes. Pure HO functions + thunks for lazy recursion.
2868/2868 tests, zero failures.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
485
lib/parser-combinators.sx
Normal file
485
lib/parser-combinators.sx
Normal file
@@ -0,0 +1,485 @@
|
||||
;; Parser Combinator Library — Pure SX
|
||||
;;
|
||||
;; A parser is (fn (input pos) result) where:
|
||||
;; Success: {:ok true :value val :pos new-pos}
|
||||
;; Failure: {:ok false :expected desc :pos pos}
|
||||
;;
|
||||
;; Combinators compose parsers: seq, alt, many, sep-by, between, etc.
|
||||
;; Recursive grammars use (lazy-parser (fn () ...)) thunks.
|
||||
|
||||
;; ── Layer 0: Result constructors ──────────────────────────────────
|
||||
|
||||
(define make-ok (fn (val pos) {:pos pos :ok true :value val}))
|
||||
(define make-fail (fn (expected pos) {:pos pos :ok false :expected expected}))
|
||||
(define ok? (fn (r) (get r "ok")))
|
||||
(define result-value (fn (r) (get r "value")))
|
||||
(define result-pos (fn (r) (get r "pos")))
|
||||
(define result-expected (fn (r) (get r "expected")))
|
||||
|
||||
;; ── Layer 1: Primitive parsers ────────────────────────────────────
|
||||
|
||||
(define
|
||||
satisfy
|
||||
(fn
|
||||
(pred desc)
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(< pos (len input))
|
||||
(let
|
||||
((ch (nth input pos)))
|
||||
(if (pred ch) (make-ok ch (+ pos 1)) (make-fail desc pos)))
|
||||
(make-fail desc pos)))))
|
||||
|
||||
(define
|
||||
parse-char
|
||||
(fn (ch) (satisfy (fn (c) (= c ch)) (str "'" ch "'"))))
|
||||
|
||||
(define any-char (satisfy (fn (c) true) "any character"))
|
||||
|
||||
(define
|
||||
parse-string
|
||||
(fn
|
||||
(target)
|
||||
(let
|
||||
((target-len (len target)))
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(<= (+ pos target-len) (len input))
|
||||
(if
|
||||
(= (slice input pos (+ pos target-len)) target)
|
||||
(make-ok target (+ pos target-len))
|
||||
(make-fail (str "\"" target "\"") pos))
|
||||
(make-fail (str "\"" target "\"") pos))))))
|
||||
|
||||
(define
|
||||
eof
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(>= pos (len input))
|
||||
(make-ok nil pos)
|
||||
(make-fail "end of input" pos))))
|
||||
|
||||
;; ── Layer 2: Core combinators ─────────────────────────────────────
|
||||
|
||||
(define
|
||||
fmap
|
||||
(fn
|
||||
(f parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) (make-ok (f (result-value r)) (result-pos r)) r)))))
|
||||
|
||||
(define
|
||||
parse-bind
|
||||
(fn
|
||||
(parser f)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) ((f (result-value r)) input (result-pos r)) r)))))
|
||||
|
||||
(define
|
||||
seq
|
||||
(fn
|
||||
(parsers)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((results (list)) (cur-pos pos) (failed nil))
|
||||
(for-each
|
||||
(fn
|
||||
(p)
|
||||
(when
|
||||
(not failed)
|
||||
(let
|
||||
((r (p input cur-pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(do
|
||||
(append! results (result-value r))
|
||||
(set! cur-pos (result-pos r)))
|
||||
(set! failed r)))))
|
||||
parsers)
|
||||
(if failed failed (make-ok results cur-pos))))))
|
||||
|
||||
(define seq2 (fn (p1 p2) (seq (list p1 p2))))
|
||||
|
||||
(define
|
||||
alt
|
||||
(fn
|
||||
(parsers)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((best-fail nil))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
(ps)
|
||||
(if
|
||||
(empty? ps)
|
||||
(or best-fail (make-fail "no alternatives" pos))
|
||||
(let
|
||||
((r ((first ps) input pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
r
|
||||
(do
|
||||
(when
|
||||
(or
|
||||
(not best-fail)
|
||||
(> (result-pos r) (result-pos best-fail)))
|
||||
(set! best-fail r))
|
||||
(loop (rest ps))))))))
|
||||
(loop parsers)))))
|
||||
|
||||
(define alt2 (fn (p1 p2) (alt (list p1 p2))))
|
||||
|
||||
(define
|
||||
label
|
||||
(fn
|
||||
(name parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let ((r (parser input pos))) (if (ok? r) r (make-fail name pos))))))
|
||||
|
||||
(define lazy-parser (fn (thunk) (fn (input pos) ((thunk) input pos))))
|
||||
|
||||
;; ── Layer 3: Repetition combinators ───────────────────────────────
|
||||
|
||||
(define
|
||||
many
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((results (list)) (cur-pos pos))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((r (parser input cur-pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(do
|
||||
(append! results (result-value r))
|
||||
(set! cur-pos (result-pos r))
|
||||
(loop))
|
||||
(make-ok results cur-pos)))))
|
||||
(loop)))))
|
||||
|
||||
(define
|
||||
many1
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(let
|
||||
((rest-r ((many parser) input (result-pos r))))
|
||||
(make-ok
|
||||
(cons (result-value r) (result-value rest-r))
|
||||
(result-pos rest-r)))
|
||||
r)))))
|
||||
|
||||
(define
|
||||
optional
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let ((r (parser input pos))) (if (ok? r) r (make-ok nil pos))))))
|
||||
|
||||
(define
|
||||
skip-many
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((cur-pos pos))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((r (parser input cur-pos)))
|
||||
(if
|
||||
(ok? r)
|
||||
(do (set! cur-pos (result-pos r)) (loop))
|
||||
(make-ok nil cur-pos)))))
|
||||
(loop)))))
|
||||
|
||||
;; ── Layer 4: Structural combinators ───────────────────────────────
|
||||
|
||||
(define
|
||||
between
|
||||
(fn
|
||||
(open close body)
|
||||
(fmap (fn (results) (nth results 1)) (seq (list open body close)))))
|
||||
|
||||
(define
|
||||
sep-by1
|
||||
(fn
|
||||
(parser sep)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((first-r (parser input pos)))
|
||||
(if
|
||||
(ok? first-r)
|
||||
(let
|
||||
((rest-r ((many (fmap (fn (pair) (nth pair 1)) (seq (list sep parser)))) input (result-pos first-r))))
|
||||
(make-ok
|
||||
(cons (result-value first-r) (result-value rest-r))
|
||||
(result-pos rest-r)))
|
||||
first-r)))))
|
||||
|
||||
(define
|
||||
sep-by
|
||||
(fn
|
||||
(parser sep)
|
||||
(alt
|
||||
(list (sep-by1 parser sep) (fn (input pos) (make-ok (list) pos))))))
|
||||
|
||||
(define
|
||||
skip-left
|
||||
(fn
|
||||
(skip keep)
|
||||
(fmap (fn (results) (nth results 1)) (seq (list skip keep)))))
|
||||
|
||||
(define
|
||||
skip-right
|
||||
(fn
|
||||
(keep skip)
|
||||
(fmap (fn (results) (first results)) (seq (list keep skip)))))
|
||||
|
||||
(define
|
||||
not-followed-by
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) (make-fail "not followed by" pos) (make-ok nil pos))))))
|
||||
|
||||
(define
|
||||
look-ahead
|
||||
(fn
|
||||
(parser)
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((r (parser input pos)))
|
||||
(if (ok? r) (make-ok (result-value r) pos) r)))))
|
||||
|
||||
;; ── Layer 5: Character class parsers ──────────────────────────────
|
||||
|
||||
(define digit (satisfy (fn (c) (and (>= c "0") (<= c "9"))) "digit"))
|
||||
|
||||
(define
|
||||
letter
|
||||
(satisfy
|
||||
(fn
|
||||
(c)
|
||||
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))
|
||||
"letter"))
|
||||
|
||||
(define alpha-num (alt2 letter digit))
|
||||
|
||||
(define
|
||||
whitespace-char
|
||||
(satisfy
|
||||
(fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))
|
||||
"whitespace"))
|
||||
|
||||
(define skip-spaces (skip-many whitespace-char))
|
||||
|
||||
;; ── Layer 6: Literal parsers ──────────────────────────────────────
|
||||
|
||||
(define
|
||||
number-literal
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
((start pos) (cur pos) (input-len (len input)))
|
||||
(when
|
||||
(and (< cur input-len) (= (nth input cur) "-"))
|
||||
(set! cur (+ cur 1)))
|
||||
(let
|
||||
((digit-start cur))
|
||||
(define
|
||||
scan-digits
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and
|
||||
(< cur input-len)
|
||||
(>= (nth input cur) "0")
|
||||
(<= (nth input cur) "9"))
|
||||
(set! cur (+ cur 1))
|
||||
(scan-digits))))
|
||||
(scan-digits)
|
||||
(if
|
||||
(= cur digit-start)
|
||||
(make-fail "number" pos)
|
||||
(do
|
||||
(when
|
||||
(and (< cur input-len) (= (nth input cur) "."))
|
||||
(set! cur (+ cur 1))
|
||||
(scan-digits))
|
||||
(make-ok (parse-float (slice input start cur)) cur)))))))
|
||||
|
||||
(define
|
||||
string-literal
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(and (< pos (len input)) (= (nth input pos) "\""))
|
||||
(let
|
||||
((cur (+ pos 1)) (input-len (len input)) (chars (list)))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(if
|
||||
(>= cur input-len)
|
||||
(make-fail "closing quote" cur)
|
||||
(let
|
||||
((ch (nth input cur)))
|
||||
(cond
|
||||
(= ch "\"")
|
||||
(make-ok (join "" chars) (+ cur 1))
|
||||
(= ch "\\")
|
||||
(if
|
||||
(>= (+ cur 1) input-len)
|
||||
(make-fail "escape character" cur)
|
||||
(let
|
||||
((next (nth input (+ cur 1))))
|
||||
(cond
|
||||
(= next "n")
|
||||
(do
|
||||
(append! chars "\n")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
(= next "t")
|
||||
(do
|
||||
(append! chars "\t")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
(= next "\\")
|
||||
(do
|
||||
(append! chars "\\")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
(= next "\"")
|
||||
(do
|
||||
(append! chars "\"")
|
||||
(set! cur (+ cur 2))
|
||||
(loop))
|
||||
:else (do
|
||||
(append! chars next)
|
||||
(set! cur (+ cur 2))
|
||||
(loop)))))
|
||||
:else (do (append! chars ch) (set! cur (+ cur 1)) (loop)))))))
|
||||
(loop))
|
||||
(make-fail "string" pos))))
|
||||
|
||||
(define
|
||||
identifier
|
||||
(let
|
||||
((id-start (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (= c "_") (= c "~"))) "identifier start"))
|
||||
(id-char
|
||||
(satisfy
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(and (>= c "a") (<= c "z"))
|
||||
(and (>= c "A") (<= c "Z"))
|
||||
(and (>= c "0") (<= c "9"))
|
||||
(= c "-")
|
||||
(= c "_")
|
||||
(= c "?")
|
||||
(= c "!")
|
||||
(= c "~")
|
||||
(= c "/")))
|
||||
"identifier char")))
|
||||
(fmap
|
||||
(fn (pair) (str (first pair) (join "" (nth pair 1))))
|
||||
(seq (list id-start (many id-char))))))
|
||||
|
||||
;; ── Layer 7: Run parser ───────────────────────────────────────────
|
||||
|
||||
(define run-parser (fn (parser input) (parser input 0)))
|
||||
|
||||
;; ── Layer 8: SX tokenizer ─────────────────────────────────────────
|
||||
|
||||
(define
|
||||
sx-comment
|
||||
(fn
|
||||
(input pos)
|
||||
(if
|
||||
(and (< pos (len input)) (= (nth input pos) ";"))
|
||||
(let
|
||||
((cur (+ pos 1)) (input-len (len input)))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(if
|
||||
(or (>= cur input-len) (= (nth input cur) "\n"))
|
||||
(make-ok {:type "comment"} cur)
|
||||
(do (set! cur (+ cur 1)) (loop)))))
|
||||
(loop))
|
||||
(make-fail "comment" pos))))
|
||||
|
||||
(define
|
||||
sx-keyword
|
||||
(parse-bind
|
||||
(parse-char ":")
|
||||
(fn (colon) (fmap (fn (name) {:value (str ":" name) :type "keyword"}) identifier))))
|
||||
|
||||
(define
|
||||
sx-symbol
|
||||
(let
|
||||
((sym-char (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")) (= c "-") (= c "_") (= c "?") (= c "!") (= c "+") (= c "*") (= c "/") (= c "<") (= c ">") (= c "=") (= c "~") (= c "&") (= c ".") (= c "#"))) "symbol char")))
|
||||
(fmap (fn (chars) {:value (join "" chars) :type "symbol"}) (many1 sym-char))))
|
||||
|
||||
(define sx-number (fmap (fn (n) {:value n :type "number"}) number-literal))
|
||||
|
||||
(define sx-string (fmap (fn (s) {:value s :type "string"}) string-literal))
|
||||
|
||||
(define
|
||||
sx-token
|
||||
(alt
|
||||
(list
|
||||
sx-comment
|
||||
sx-number
|
||||
sx-string
|
||||
sx-keyword
|
||||
(fmap (fn (c) {:type "open-paren"}) (parse-char "("))
|
||||
(fmap (fn (c) {:type "close-paren"}) (parse-char ")"))
|
||||
(fmap (fn (c) {:type "open-brace"}) (parse-char "{"))
|
||||
(fmap (fn (c) {:type "close-brace"}) (parse-char "}"))
|
||||
(fmap (fn (c) {:type "quote"}) (parse-char "'"))
|
||||
sx-symbol)))
|
||||
|
||||
(define
|
||||
sx-tokenize
|
||||
(fmap
|
||||
(fn
|
||||
(tokens)
|
||||
(filter (fn (t) (not (= (get t "type") "comment"))) tokens))
|
||||
(skip-left skip-spaces (many (skip-right sx-token skip-spaces)))))
|
||||
Reference in New Issue
Block a user