;; lib/scheme/parser.sx — R7RS-small lexical reader. ;; ;; Reads numbers, booleans, characters, strings, symbols, vectors, ;; proper lists, and the standard reader macros (' ` , ,@). Line ;; comments `;`, datum comments `#;`, and nestable block comments ;; `#| ... |#` are all stripped. ;; ;; Dotted-pair syntax `(a b . c)` is deferred to Phase 3 — it's only ;; needed once lambdas with rest args land. ;; ;; AST shape ;; --------- ;; number → SX number ;; #t / #true → SX true ; #f / #false → SX false ;; () → SX empty list ;; "..." → {:scm-string "..."} wrapped so symbols stay bare ;; #\c → {:scm-char "c"} single-char (or named: space/newline/etc.) ;; #(1 2 3) → {:scm-vector (list 1 2 3)} ;; foo → "foo" bare SX string for symbols ;; (a b c) → SX list of parsed forms ;; 'X → (list "quote" X) ;; `X → (list "quasiquote" X) ;; ,X → (list "unquote" X) ;; ,@X → (list "unquote-splicing" X) ;; ;; Public API ;; (scheme-parse SRC) — first form; errors on extra trailing input ;; (scheme-parse-all SRC) — all top-level forms, as SX list ;; (scheme-string? V) — recognise wrapped string literal ;; (scheme-string-value V) — extract the underlying string ;; (scheme-char? V) / (scheme-char-value V) ;; (scheme-vector? V) / (scheme-vector-elements V) ;; ;; Consumes: lib/guest/lex.sx (lex-digit?, lex-whitespace?) (define scheme-string-make (fn (s) {:scm-string s})) (define scheme-string? (fn (v) (and (dict? v) (string? (get v :scm-string))))) (define scheme-string-value (fn (v) (get v :scm-string))) (define scheme-char-make (fn (s) {:scm-char s})) (define scheme-char? (fn (v) (and (dict? v) (string? (get v :scm-char))))) (define scheme-char-value (fn (v) (get v :scm-char))) (define scheme-vector-make (fn (xs) {:scm-vector xs})) (define scheme-vector? (fn (v) (and (dict? v) (list? (get v :scm-vector))))) (define scheme-vector-elements (fn (v) (get v :scm-vector))) ;; Atom delimiters: characters that end a symbol or numeric token. (define scm-delim? (fn (c) (or (nil? c) (lex-whitespace? c) (= c "(") (= c ")") (= c "\"") (= c ";") (= c "'") (= c "`") (= c ",") (= c "|")))) ;; Numeric grammar: [+-]? (digit+ ('.' digit+)? | '.' digit+) ([eE][+-]?digit+)? (define scm-numeric? (fn (s) (let ((n (string-length s))) (cond ((= n 0) false) (:else (let ((c0 (substring s 0 1))) (let ((start (if (or (= c0 "+") (= c0 "-")) 1 0))) (scm-num-body? s start n)))))))) (define scm-num-body? (fn (s start n) (cond ((>= start n) false) ((= (substring s start (+ start 1)) ".") (scm-num-need-digits? s (+ start 1) n false)) ((lex-digit? (substring s start (+ start 1))) (scm-num-int-tail? s (+ start 1) n)) (:else false)))) (define scm-num-int-tail? (fn (s i n) (cond ((>= i n) true) ((lex-digit? (substring s i (+ i 1))) (scm-num-int-tail? s (+ i 1) n)) ((= (substring s i (+ i 1)) ".") (scm-num-need-digits? s (+ i 1) n true)) ((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E")) (scm-num-exp-sign? s (+ i 1) n)) (:else false)))) (define scm-num-need-digits? (fn (s i n had-int) (cond ((>= i n) had-int) ((lex-digit? (substring s i (+ i 1))) (scm-num-frac-tail? s (+ i 1) n)) (:else false)))) (define scm-num-frac-tail? (fn (s i n) (cond ((>= i n) true) ((lex-digit? (substring s i (+ i 1))) (scm-num-frac-tail? s (+ i 1) n)) ((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E")) (scm-num-exp-sign? s (+ i 1) n)) (:else false)))) (define scm-num-exp-sign? (fn (s i n) (cond ((>= i n) false) ((or (= (substring s i (+ i 1)) "+") (= (substring s i (+ i 1)) "-")) (scm-num-exp-digits? s (+ i 1) n false)) (:else (scm-num-exp-digits? s i n false))))) (define scm-num-exp-digits? (fn (s i n had) (cond ((>= i n) had) ((lex-digit? (substring s i (+ i 1))) (scm-num-exp-digits? s (+ i 1) n true)) (:else false)))) ;; Named character literals: #\space, #\newline, etc. (define scm-named-char (fn (name) (cond ((= name "space") " ") ((= name "newline") "\n") ((= name "tab") "\t") ((= name "return") "\r") ((= name "null") "") ((= name "nul") "") ((= name "delete") "") ((= name "rubout") "") ((= name "backspace") "") ((= name "escape") "") ((= name "alarm") "") (:else nil)))) ;; Reader: closure over (src, pos). (define scm-make-reader (fn (src) (let ((pos 0) (n (string-length src))) (define at (fn () (if (< pos n) (substring src pos (+ pos 1)) nil))) (define peek2 (fn () (if (< (+ pos 1) n) (substring src (+ pos 1) (+ pos 2)) nil))) (define adv (fn () (set! pos (+ pos 1)))) (define skip-line (fn () (when (and (at) (not (= (at) "\n"))) (adv) (skip-line)))) (define skip-block-comment (fn (depth) (cond ((nil? (at)) (error "scheme-parse: unterminated block comment")) ((and (= (at) "|") (= (peek2) "#")) (do (adv) (adv) (cond ((> depth 1) (skip-block-comment (- depth 1))) (:else nil)))) ((and (= (at) "#") (= (peek2) "|")) (do (adv) (adv) (skip-block-comment (+ depth 1)))) (:else (do (adv) (skip-block-comment depth)))))) (define skip-ws (fn () (cond ((nil? (at)) nil) ((lex-whitespace? (at)) (do (adv) (skip-ws))) ((= (at) ";") (do (adv) (skip-line) (skip-ws))) ((and (= (at) "#") (= (peek2) "|")) (do (adv) (adv) (skip-block-comment 1) (skip-ws))) (:else nil)))) (define read-string-body (fn (acc) (cond ((nil? (at)) (error "scheme-parse: unterminated string")) ((= (at) "\"") (do (adv) acc)) ((= (at) "\\") (do (adv) (let ((c (at))) (when (nil? c) (error "scheme-parse: trailing backslash")) (adv) (read-string-body (str acc (cond ((= c "n") "\n") ((= c "t") "\t") ((= c "r") "\r") ((= c "\"") "\"") ((= c "\\") "\\") ((= c "a") "") ((= c "b") "") (:else c))))))) (:else (let ((c (at))) (adv) (read-string-body (str acc c))))))) (define read-atom-body (fn (acc) (cond ((scm-delim? (at)) acc) (:else (let ((c (at))) (adv) (read-atom-body (str acc c))))))) (define classify-atom (fn (s) (cond ((scm-numeric? s) (string->number s)) (:else s)))) (define read-hash (fn () (let ((c (at))) (cond ((= c "t") (do (adv) (when (= (at) "r") (do (adv) (adv) (adv) (adv))) true)) ((= c "f") (do (adv) (when (= (at) "a") (do (adv) (adv) (adv) (adv) (adv))) false)) ((= c "\\") (do (adv) (read-char-lit))) ((= c "(") (do (adv) (scheme-vector-make (read-list (list))))) ((= c ";") (do (adv) (read-form) (read-form))) (:else (error (str "scheme-parse: unknown # form: #" c))))))) (define read-char-lit (fn () (let ((first-c (at))) (when (nil? first-c) (error "scheme-parse: end after #\\")) (adv) (cond ((scm-delim? (at)) (scheme-char-make first-c)) (:else (let ((name (str first-c (read-atom-body "")))) (let ((named (scm-named-char name))) (cond ((not (nil? named)) (scheme-char-make named)) ((= (string-length name) 1) (scheme-char-make name)) (:else (error (str "scheme-parse: unknown char #\\" name))))))))))) (define read-form (fn () (skip-ws) (cond ((nil? (at)) :scm-eof) ((= (at) ")") (error "scheme-parse: unexpected ')'")) ((= (at) "(") (do (adv) (read-list (list)))) ((= (at) "\"") (do (adv) (scheme-string-make (read-string-body "")))) ((= (at) "#") (do (adv) (read-hash))) ((= (at) "'") (do (adv) (list "quote" (read-form)))) ((= (at) "`") (do (adv) (list "quasiquote" (read-form)))) ((= (at) ",") (do (adv) (cond ((= (at) "@") (do (adv) (list "unquote-splicing" (read-form)))) (:else (list "unquote" (read-form)))))) (:else (classify-atom (read-atom-body "")))))) (define read-list (fn (acc) (skip-ws) (cond ((nil? (at)) (error "scheme-parse: unterminated list")) ((= (at) ")") (do (adv) acc)) (:else (read-list (append acc (list (read-form)))))))) (define read-all (fn (acc) (skip-ws) (if (nil? (at)) acc (read-all (append acc (list (read-form))))))) {:read-form read-form :read-all read-all}))) (define scheme-parse-all (fn (src) ((get (scm-make-reader src) :read-all) (list)))) (define scheme-parse (fn (src) (let ((r (scm-make-reader src))) (let ((form ((get r :read-form)))) (cond ((= form :scm-eof) (error "scheme-parse: empty input")) (:else (let ((next ((get r :read-form)))) (if (= next :scm-eof) form (error "scheme-parse: trailing input after first form")))))))))