;; Parser Combinator Library — Pure SX ;; ;; A parser is (fn (input pos) result) where: ;; Success: {:ok true :value val :pos new-pos} ;; Failure: {:ok false :expected desc :pos pos} ;; ;; Combinators compose parsers: seq, alt, many, sep-by, between, etc. ;; Recursive grammars use (lazy-parser (fn () ...)) thunks. ;; ── Layer 0: Result constructors ────────────────────────────────── (define make-ok (fn (val pos) {:pos pos :ok true :value val})) (define make-fail (fn (expected pos) {:pos pos :ok false :expected expected})) (define ok? (fn (r) (get r "ok"))) (define result-value (fn (r) (get r "value"))) (define result-pos (fn (r) (get r "pos"))) (define result-expected (fn (r) (get r "expected"))) ;; ── Layer 1: Primitive parsers ──────────────────────────────────── (define satisfy (fn (pred desc) (fn (input pos) (if (< pos (len input)) (let ((ch (nth input pos))) (if (pred ch) (make-ok ch (+ pos 1)) (make-fail desc pos))) (make-fail desc pos))))) (define parse-char (fn (ch) (satisfy (fn (c) (= c ch)) (str "'" ch "'")))) (define any-char (satisfy (fn (c) true) "any character")) (define parse-string (fn (target) (let ((target-len (len target))) (fn (input pos) (if (<= (+ pos target-len) (len input)) (if (= (slice input pos (+ pos target-len)) target) (make-ok target (+ pos target-len)) (make-fail (str "\"" target "\"") pos)) (make-fail (str "\"" target "\"") pos)))))) (define eof (fn (input pos) (if (>= pos (len input)) (make-ok nil pos) (make-fail "end of input" pos)))) ;; ── Layer 2: Core combinators ───────────────────────────────────── (define fmap (fn (f parser) (fn (input pos) (let ((r (parser input pos))) (if (ok? r) (make-ok (f (result-value r)) (result-pos r)) r))))) (define parse-bind (fn (parser f) (fn (input pos) (let ((r (parser input pos))) (if (ok? r) ((f (result-value r)) input (result-pos r)) r))))) (define seq (fn (parsers) (fn (input pos) (let ((results (list)) (cur-pos pos) (failed nil)) (for-each (fn (p) (when (not failed) (let ((r (p input cur-pos))) (if (ok? r) (do (append! results (result-value r)) (set! cur-pos (result-pos r))) (set! failed r))))) parsers) (if failed failed (make-ok results cur-pos)))))) (define seq2 (fn (p1 p2) (seq (list p1 p2)))) (define alt (fn (parsers) (fn (input pos) (let ((best-fail nil)) (define loop (fn (ps) (if (empty? ps) (or best-fail (make-fail "no alternatives" pos)) (let ((r ((first ps) input pos))) (if (ok? r) r (do (when (or (not best-fail) (> (result-pos r) (result-pos best-fail))) (set! best-fail r)) (loop (rest ps)))))))) (loop parsers))))) (define alt2 (fn (p1 p2) (alt (list p1 p2)))) (define label (fn (name parser) (fn (input pos) (let ((r (parser input pos))) (if (ok? r) r (make-fail name pos)))))) (define lazy-parser (fn (thunk) (fn (input pos) ((thunk) input pos)))) ;; ── Layer 3: Repetition combinators ─────────────────────────────── (define many (fn (parser) (fn (input pos) (let ((results (list)) (cur-pos pos)) (define loop (fn () (let ((r (parser input cur-pos))) (if (ok? r) (do (append! results (result-value r)) (set! cur-pos (result-pos r)) (loop)) (make-ok results cur-pos))))) (loop))))) (define many1 (fn (parser) (fn (input pos) (let ((r (parser input pos))) (if (ok? r) (let ((rest-r ((many parser) input (result-pos r)))) (make-ok (cons (result-value r) (result-value rest-r)) (result-pos rest-r))) r))))) (define optional (fn (parser) (fn (input pos) (let ((r (parser input pos))) (if (ok? r) r (make-ok nil pos)))))) (define skip-many (fn (parser) (fn (input pos) (let ((cur-pos pos)) (define loop (fn () (let ((r (parser input cur-pos))) (if (ok? r) (do (set! cur-pos (result-pos r)) (loop)) (make-ok nil cur-pos))))) (loop))))) ;; ── Layer 4: Structural combinators ─────────────────────────────── (define between (fn (open close body) (fmap (fn (results) (nth results 1)) (seq (list open body close))))) (define sep-by1 (fn (parser sep) (fn (input pos) (let ((first-r (parser input pos))) (if (ok? first-r) (let ((rest-r ((many (fmap (fn (pair) (nth pair 1)) (seq (list sep parser)))) input (result-pos first-r)))) (make-ok (cons (result-value first-r) (result-value rest-r)) (result-pos rest-r))) first-r))))) (define sep-by (fn (parser sep) (alt (list (sep-by1 parser sep) (fn (input pos) (make-ok (list) pos)))))) (define skip-left (fn (skip keep) (fmap (fn (results) (nth results 1)) (seq (list skip keep))))) (define skip-right (fn (keep skip) (fmap (fn (results) (first results)) (seq (list keep skip))))) (define not-followed-by (fn (parser) (fn (input pos) (let ((r (parser input pos))) (if (ok? r) (make-fail "not followed by" pos) (make-ok nil pos)))))) (define look-ahead (fn (parser) (fn (input pos) (let ((r (parser input pos))) (if (ok? r) (make-ok (result-value r) pos) r))))) ;; ── Layer 5: Character class parsers ────────────────────────────── (define digit (satisfy (fn (c) (and (>= c "0") (<= c "9"))) "digit")) (define letter (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))) "letter")) (define alpha-num (alt2 letter digit)) (define whitespace-char (satisfy (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))) "whitespace")) (define skip-spaces (skip-many whitespace-char)) ;; ── Layer 6: Literal parsers ────────────────────────────────────── (define number-literal (fn (input pos) (let ((start pos) (cur pos) (input-len (len input))) (when (and (< cur input-len) (= (nth input cur) "-")) (set! cur (+ cur 1))) (let ((digit-start cur)) (define scan-digits (fn () (when (and (< cur input-len) (>= (nth input cur) "0") (<= (nth input cur) "9")) (set! cur (+ cur 1)) (scan-digits)))) (scan-digits) (if (= cur digit-start) (make-fail "number" pos) (do (when (and (< cur input-len) (= (nth input cur) ".")) (set! cur (+ cur 1)) (scan-digits)) (make-ok (parse-float (slice input start cur)) cur))))))) (define string-literal (fn (input pos) (if (and (< pos (len input)) (= (nth input pos) "\"")) (let ((cur (+ pos 1)) (input-len (len input)) (chars (list))) (define loop (fn () (if (>= cur input-len) (make-fail "closing quote" cur) (let ((ch (nth input cur))) (cond (= ch "\"") (make-ok (join "" chars) (+ cur 1)) (= ch "\\") (if (>= (+ cur 1) input-len) (make-fail "escape character" cur) (let ((next (nth input (+ cur 1)))) (cond (= next "n") (do (append! chars "\n") (set! cur (+ cur 2)) (loop)) (= next "t") (do (append! chars "\t") (set! cur (+ cur 2)) (loop)) (= next "\\") (do (append! chars "\\") (set! cur (+ cur 2)) (loop)) (= next "\"") (do (append! chars "\"") (set! cur (+ cur 2)) (loop)) :else (do (append! chars next) (set! cur (+ cur 2)) (loop))))) :else (do (append! chars ch) (set! cur (+ cur 1)) (loop))))))) (loop)) (make-fail "string" pos)))) (define identifier (let ((id-start (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (= c "_") (= c "~"))) "identifier start")) (id-char (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")) (= c "-") (= c "_") (= c "?") (= c "!") (= c "~") (= c "/"))) "identifier char"))) (fmap (fn (pair) (str (first pair) (join "" (nth pair 1)))) (seq (list id-start (many id-char)))))) ;; ── Layer 7: Run parser ─────────────────────────────────────────── (define run-parser (fn (parser input) (parser input 0))) ;; ── Layer 8: SX tokenizer ───────────────────────────────────────── (define sx-comment (fn (input pos) (if (and (< pos (len input)) (= (nth input pos) ";")) (let ((cur (+ pos 1)) (input-len (len input))) (define loop (fn () (if (or (>= cur input-len) (= (nth input cur) "\n")) (make-ok {:type "comment"} cur) (do (set! cur (+ cur 1)) (loop))))) (loop)) (make-fail "comment" pos)))) (define sx-keyword (parse-bind (parse-char ":") (fn (colon) (fmap (fn (name) {:value (str ":" name) :type "keyword"}) identifier)))) (define sx-symbol (let ((sym-char (satisfy (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")) (= c "-") (= c "_") (= c "?") (= c "!") (= c "+") (= c "*") (= c "/") (= c "<") (= c ">") (= c "=") (= c "~") (= c "&") (= c ".") (= c "#"))) "symbol char"))) (fmap (fn (chars) {:value (join "" chars) :type "symbol"}) (many1 sym-char)))) (define sx-number (fmap (fn (n) {:value n :type "number"}) number-literal)) (define sx-string (fmap (fn (s) {:value s :type "string"}) string-literal)) (define sx-token (alt (list sx-comment sx-number sx-string sx-keyword (fmap (fn (c) {:type "open-paren"}) (parse-char "(")) (fmap (fn (c) {:type "close-paren"}) (parse-char ")")) (fmap (fn (c) {:type "open-brace"}) (parse-char "{")) (fmap (fn (c) {:type "close-brace"}) (parse-char "}")) (fmap (fn (c) {:type "quote"}) (parse-char "'")) sx-symbol))) (define sx-tokenize (fmap (fn (tokens) (filter (fn (t) (not (= (get t "type") "comment"))) tokens)) (skip-left skip-spaces (many (skip-right sx-token skip-spaces)))))