Files
rose-ash/lib/parser-combinators.sx
giles 516f9c7186 Step 12: Parser combinator library — pure SX, 68 tests
lib/parser-combinators.sx — 46 definitions in 8 layers:
  0. Result constructors (make-ok, make-fail, ok?, result-value, etc.)
  1. Primitives (satisfy, parse-char, any-char, parse-string, eof)
  2. Core combinators (fmap, parse-bind, seq, alt, label, lazy-parser)
  3. Repetition (many, many1, optional, skip-many)
  4. Structural (between, sep-by, sep-by1, skip-left, skip-right,
     not-followed-by, look-ahead)
  5. Character classes (digit, letter, alpha-num, whitespace, skip-spaces)
  6. Literal parsers (number-literal, string-literal, identifier)
  7. Run function (run-parser)
  8. SX tokenizer (sx-comment, sx-keyword, sx-symbol, sx-number,
     sx-string, sx-token, sx-tokenize)

Self-tests by tokenizing SX: (define x 42), {:ok true}, (+ 1 (* 2 3)),
comments, negative numbers, nested parens, recursive grammars.

No evaluator changes. Pure HO functions + thunks for lazy recursion.
2868/2868 tests, zero failures.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 20:14:51 +00:00

485 lines
13 KiB
Plaintext

;; Parser Combinator Library — Pure SX
;;
;; A parser is (fn (input pos) result) where:
;; Success: {:ok true :value val :pos new-pos}
;; Failure: {:ok false :expected desc :pos pos}
;;
;; Combinators compose parsers: seq, alt, many, sep-by, between, etc.
;; Recursive grammars use (lazy-parser (fn () ...)) thunks.
;; ── Layer 0: Result constructors ──────────────────────────────────
(define make-ok (fn (val pos) {:pos pos :ok true :value val}))
(define make-fail (fn (expected pos) {:pos pos :ok false :expected expected}))
(define ok? (fn (r) (get r "ok")))
(define result-value (fn (r) (get r "value")))
(define result-pos (fn (r) (get r "pos")))
(define result-expected (fn (r) (get r "expected")))
;; ── Layer 1: Primitive parsers ────────────────────────────────────
(define
satisfy
(fn
(pred desc)
(fn
(input pos)
(if
(< pos (len input))
(let
((ch (nth input pos)))
(if (pred ch) (make-ok ch (+ pos 1)) (make-fail desc pos)))
(make-fail desc pos)))))
(define
parse-char
(fn (ch) (satisfy (fn (c) (= c ch)) (str "'" ch "'"))))
(define any-char (satisfy (fn (c) true) "any character"))
(define
parse-string
(fn
(target)
(let
((target-len (len target)))
(fn
(input pos)
(if
(<= (+ pos target-len) (len input))
(if
(= (slice input pos (+ pos target-len)) target)
(make-ok target (+ pos target-len))
(make-fail (str "\"" target "\"") pos))
(make-fail (str "\"" target "\"") pos))))))
(define
eof
(fn
(input pos)
(if
(>= pos (len input))
(make-ok nil pos)
(make-fail "end of input" pos))))
;; ── Layer 2: Core combinators ─────────────────────────────────────
(define
fmap
(fn
(f parser)
(fn
(input pos)
(let
((r (parser input pos)))
(if (ok? r) (make-ok (f (result-value r)) (result-pos r)) r)))))
(define
parse-bind
(fn
(parser f)
(fn
(input pos)
(let
((r (parser input pos)))
(if (ok? r) ((f (result-value r)) input (result-pos r)) r)))))
(define
seq
(fn
(parsers)
(fn
(input pos)
(let
((results (list)) (cur-pos pos) (failed nil))
(for-each
(fn
(p)
(when
(not failed)
(let
((r (p input cur-pos)))
(if
(ok? r)
(do
(append! results (result-value r))
(set! cur-pos (result-pos r)))
(set! failed r)))))
parsers)
(if failed failed (make-ok results cur-pos))))))
(define seq2 (fn (p1 p2) (seq (list p1 p2))))
(define
alt
(fn
(parsers)
(fn
(input pos)
(let
((best-fail nil))
(define
loop
(fn
(ps)
(if
(empty? ps)
(or best-fail (make-fail "no alternatives" pos))
(let
((r ((first ps) input pos)))
(if
(ok? r)
r
(do
(when
(or
(not best-fail)
(> (result-pos r) (result-pos best-fail)))
(set! best-fail r))
(loop (rest ps))))))))
(loop parsers)))))
(define alt2 (fn (p1 p2) (alt (list p1 p2))))
(define
label
(fn
(name parser)
(fn
(input pos)
(let ((r (parser input pos))) (if (ok? r) r (make-fail name pos))))))
(define lazy-parser (fn (thunk) (fn (input pos) ((thunk) input pos))))
;; ── Layer 3: Repetition combinators ───────────────────────────────
(define
many
(fn
(parser)
(fn
(input pos)
(let
((results (list)) (cur-pos pos))
(define
loop
(fn
()
(let
((r (parser input cur-pos)))
(if
(ok? r)
(do
(append! results (result-value r))
(set! cur-pos (result-pos r))
(loop))
(make-ok results cur-pos)))))
(loop)))))
(define
many1
(fn
(parser)
(fn
(input pos)
(let
((r (parser input pos)))
(if
(ok? r)
(let
((rest-r ((many parser) input (result-pos r))))
(make-ok
(cons (result-value r) (result-value rest-r))
(result-pos rest-r)))
r)))))
(define
optional
(fn
(parser)
(fn
(input pos)
(let ((r (parser input pos))) (if (ok? r) r (make-ok nil pos))))))
(define
skip-many
(fn
(parser)
(fn
(input pos)
(let
((cur-pos pos))
(define
loop
(fn
()
(let
((r (parser input cur-pos)))
(if
(ok? r)
(do (set! cur-pos (result-pos r)) (loop))
(make-ok nil cur-pos)))))
(loop)))))
;; ── Layer 4: Structural combinators ───────────────────────────────
(define
between
(fn
(open close body)
(fmap (fn (results) (nth results 1)) (seq (list open body close)))))
(define
sep-by1
(fn
(parser sep)
(fn
(input pos)
(let
((first-r (parser input pos)))
(if
(ok? first-r)
(let
((rest-r ((many (fmap (fn (pair) (nth pair 1)) (seq (list sep parser)))) input (result-pos first-r))))
(make-ok
(cons (result-value first-r) (result-value rest-r))
(result-pos rest-r)))
first-r)))))
(define
sep-by
(fn
(parser sep)
(alt
(list (sep-by1 parser sep) (fn (input pos) (make-ok (list) pos))))))
(define
skip-left
(fn
(skip keep)
(fmap (fn (results) (nth results 1)) (seq (list skip keep)))))
(define
skip-right
(fn
(keep skip)
(fmap (fn (results) (first results)) (seq (list keep skip)))))
(define
not-followed-by
(fn
(parser)
(fn
(input pos)
(let
((r (parser input pos)))
(if (ok? r) (make-fail "not followed by" pos) (make-ok nil pos))))))
(define
look-ahead
(fn
(parser)
(fn
(input pos)
(let
((r (parser input pos)))
(if (ok? r) (make-ok (result-value r) pos) r)))))
;; ── Layer 5: Character class parsers ──────────────────────────────
(define digit (satisfy (fn (c) (and (>= c "0") (<= c "9"))) "digit"))
(define
letter
(satisfy
(fn
(c)
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))
"letter"))
(define alpha-num (alt2 letter digit))
(define
whitespace-char
(satisfy
(fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))
"whitespace"))
(define skip-spaces (skip-many whitespace-char))
;; ── Layer 6: Literal parsers ──────────────────────────────────────
(define
number-literal
(fn
(input pos)
(let
((start pos) (cur pos) (input-len (len input)))
(when
(and (< cur input-len) (= (nth input cur) "-"))
(set! cur (+ cur 1)))
(let
((digit-start cur))
(define
scan-digits
(fn
()
(when
(and
(< cur input-len)
(>= (nth input cur) "0")
(<= (nth input cur) "9"))
(set! cur (+ cur 1))
(scan-digits))))
(scan-digits)
(if
(= cur digit-start)
(make-fail "number" pos)
(do
(when
(and (< cur input-len) (= (nth input cur) "."))
(set! cur (+ cur 1))
(scan-digits))
(make-ok (parse-float (slice input start cur)) cur)))))))
(define
string-literal
(fn
(input pos)
(if
(and (< pos (len input)) (= (nth input pos) "\""))
(let
((cur (+ pos 1)) (input-len (len input)) (chars (list)))
(define
loop
(fn
()
(if
(>= cur input-len)
(make-fail "closing quote" cur)
(let
((ch (nth input cur)))
(cond
(= ch "\"")
(make-ok (join "" chars) (+ cur 1))
(= ch "\\")
(if
(>= (+ cur 1) input-len)
(make-fail "escape character" cur)
(let
((next (nth input (+ cur 1))))
(cond
(= next "n")
(do
(append! chars "\n")
(set! cur (+ cur 2))
(loop))
(= next "t")
(do
(append! chars "\t")
(set! cur (+ cur 2))
(loop))
(= next "\\")
(do
(append! chars "\\")
(set! cur (+ cur 2))
(loop))
(= next "\"")
(do
(append! chars "\"")
(set! cur (+ cur 2))
(loop))
:else (do
(append! chars next)
(set! cur (+ cur 2))
(loop)))))
:else (do (append! chars ch) (set! cur (+ cur 1)) (loop)))))))
(loop))
(make-fail "string" pos))))
(define
identifier
(let
((id-start (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (= c "_") (= c "~"))) "identifier start"))
(id-char
(satisfy
(fn
(c)
(or
(and (>= c "a") (<= c "z"))
(and (>= c "A") (<= c "Z"))
(and (>= c "0") (<= c "9"))
(= c "-")
(= c "_")
(= c "?")
(= c "!")
(= c "~")
(= c "/")))
"identifier char")))
(fmap
(fn (pair) (str (first pair) (join "" (nth pair 1))))
(seq (list id-start (many id-char))))))
;; ── Layer 7: Run parser ───────────────────────────────────────────
(define run-parser (fn (parser input) (parser input 0)))
;; ── Layer 8: SX tokenizer ─────────────────────────────────────────
(define
sx-comment
(fn
(input pos)
(if
(and (< pos (len input)) (= (nth input pos) ";"))
(let
((cur (+ pos 1)) (input-len (len input)))
(define
loop
(fn
()
(if
(or (>= cur input-len) (= (nth input cur) "\n"))
(make-ok {:type "comment"} cur)
(do (set! cur (+ cur 1)) (loop)))))
(loop))
(make-fail "comment" pos))))
(define
sx-keyword
(parse-bind
(parse-char ":")
(fn (colon) (fmap (fn (name) {:value (str ":" name) :type "keyword"}) identifier))))
(define
sx-symbol
(let
((sym-char (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")) (= c "-") (= c "_") (= c "?") (= c "!") (= c "+") (= c "*") (= c "/") (= c "<") (= c ">") (= c "=") (= c "~") (= c "&") (= c ".") (= c "#"))) "symbol char")))
(fmap (fn (chars) {:value (join "" chars) :type "symbol"}) (many1 sym-char))))
(define sx-number (fmap (fn (n) {:value n :type "number"}) number-literal))
(define sx-string (fmap (fn (s) {:value s :type "string"}) string-literal))
(define
sx-token
(alt
(list
sx-comment
sx-number
sx-string
sx-keyword
(fmap (fn (c) {:type "open-paren"}) (parse-char "("))
(fmap (fn (c) {:type "close-paren"}) (parse-char ")"))
(fmap (fn (c) {:type "open-brace"}) (parse-char "{"))
(fmap (fn (c) {:type "close-brace"}) (parse-char "}"))
(fmap (fn (c) {:type "quote"}) (parse-char "'"))
sx-symbol)))
(define
sx-tokenize
(fmap
(fn
(tokens)
(filter (fn (t) (not (= (get t "type") "comment"))) tokens))
(skip-left skip-spaces (many (skip-right sx-token skip-spaces)))))