Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 21s
lib/scheme/parser.sx — reader for R7RS-small lexical syntax:
- numbers (int/float/exp)
- booleans #t / #f / #true / #false
- strings with standard escapes
- symbols (permissive — any non-delimiter)
- characters #\c, #\space, #\newline, #\tab, etc.
- vectors #(...)
- proper lists (dotted-pair deferred to Phase 3 with lambda rest-args)
- reader macros: 'X `X ,X ,@X → (quote X) (quasiquote X) etc.
(Scheme conventions — lowercase, no $ prefix)
- line comments ;
- nestable block comments #| ... |#
- datum comments #;<datum>
AST shape mirrors Kernel: numbers/booleans/lists pass through;
strings wrapped as {:scm-string ...} to distinguish from symbols
(bare SX strings); chars as {:scm-char ...}; vectors as
{:scm-vector (list ...)}.
62 tests in lib/scheme/tests/parse.sx cover atom kinds, escape
sequences, quote/quasiquote/unquote/unquote-splicing, all three
comment flavours, and classic Scheme idioms (lambda, define, let,
if-cond).
Note: SX cond branches evaluate only the LAST expression, so
multi-mutation branches need explicit (do ...) or (begin ...)
wrappers — caught during block-comment debugging.
chisel: consumes-lex (lex-digit?, lex-whitespace? from
lib/guest/lex.sx); pratt not consumed (no operator precedence
in Scheme).
364 lines
11 KiB
Plaintext
364 lines
11 KiB
Plaintext
;; lib/scheme/parser.sx — R7RS-small lexical reader.
|
||
;;
|
||
;; Reads numbers, booleans, characters, strings, symbols, vectors,
|
||
;; proper lists, and the standard reader macros (' ` , ,@). Line
|
||
;; comments `;`, datum comments `#;`, and nestable block comments
|
||
;; `#| ... |#` are all stripped.
|
||
;;
|
||
;; Dotted-pair syntax `(a b . c)` is deferred to Phase 3 — it's only
|
||
;; needed once lambdas with rest args land.
|
||
;;
|
||
;; AST shape
|
||
;; ---------
|
||
;; number → SX number
|
||
;; #t / #true → SX true ; #f / #false → SX false
|
||
;; () → SX empty list
|
||
;; "..." → {:scm-string "..."} wrapped so symbols stay bare
|
||
;; #\c → {:scm-char "c"} single-char (or named: space/newline/etc.)
|
||
;; #(1 2 3) → {:scm-vector (list 1 2 3)}
|
||
;; foo → "foo" bare SX string for symbols
|
||
;; (a b c) → SX list of parsed forms
|
||
;; 'X → (list "quote" X)
|
||
;; `X → (list "quasiquote" X)
|
||
;; ,X → (list "unquote" X)
|
||
;; ,@X → (list "unquote-splicing" X)
|
||
;;
|
||
;; Public API
|
||
;; (scheme-parse SRC) — first form; errors on extra trailing input
|
||
;; (scheme-parse-all SRC) — all top-level forms, as SX list
|
||
;; (scheme-string? V) — recognise wrapped string literal
|
||
;; (scheme-string-value V) — extract the underlying string
|
||
;; (scheme-char? V) / (scheme-char-value V)
|
||
;; (scheme-vector? V) / (scheme-vector-elements V)
|
||
;;
|
||
;; Consumes: lib/guest/lex.sx (lex-digit?, lex-whitespace?)
|
||
|
||
(define scheme-string-make (fn (s) {:scm-string s}))
|
||
(define
|
||
scheme-string?
|
||
(fn (v) (and (dict? v) (string? (get v :scm-string)))))
|
||
(define scheme-string-value (fn (v) (get v :scm-string)))
|
||
|
||
(define scheme-char-make (fn (s) {:scm-char s}))
|
||
(define
|
||
scheme-char?
|
||
(fn (v) (and (dict? v) (string? (get v :scm-char)))))
|
||
(define scheme-char-value (fn (v) (get v :scm-char)))
|
||
|
||
(define scheme-vector-make (fn (xs) {:scm-vector xs}))
|
||
(define
|
||
scheme-vector?
|
||
(fn (v) (and (dict? v) (list? (get v :scm-vector)))))
|
||
(define scheme-vector-elements (fn (v) (get v :scm-vector)))
|
||
|
||
;; Atom delimiters: characters that end a symbol or numeric token.
|
||
(define
|
||
scm-delim?
|
||
(fn
|
||
(c)
|
||
(or
|
||
(nil? c)
|
||
(lex-whitespace? c)
|
||
(= c "(")
|
||
(= c ")")
|
||
(= c "\"")
|
||
(= c ";")
|
||
(= c "'")
|
||
(= c "`")
|
||
(= c ",")
|
||
(= c "|"))))
|
||
|
||
;; Numeric grammar: [+-]? (digit+ ('.' digit+)? | '.' digit+) ([eE][+-]?digit+)?
|
||
(define
|
||
scm-numeric?
|
||
(fn
|
||
(s)
|
||
(let
|
||
((n (string-length s)))
|
||
(cond
|
||
((= n 0) false)
|
||
(:else
|
||
(let
|
||
((c0 (substring s 0 1)))
|
||
(let
|
||
((start (if (or (= c0 "+") (= c0 "-")) 1 0)))
|
||
(scm-num-body? s start n))))))))
|
||
|
||
(define
|
||
scm-num-body?
|
||
(fn
|
||
(s start n)
|
||
(cond
|
||
((>= start n) false)
|
||
((= (substring s start (+ start 1)) ".")
|
||
(scm-num-need-digits? s (+ start 1) n false))
|
||
((lex-digit? (substring s start (+ start 1)))
|
||
(scm-num-int-tail? s (+ start 1) n))
|
||
(:else false))))
|
||
|
||
(define
|
||
scm-num-int-tail?
|
||
(fn
|
||
(s i n)
|
||
(cond
|
||
((>= i n) true)
|
||
((lex-digit? (substring s i (+ i 1)))
|
||
(scm-num-int-tail? s (+ i 1) n))
|
||
((= (substring s i (+ i 1)) ".")
|
||
(scm-num-need-digits? s (+ i 1) n true))
|
||
((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E"))
|
||
(scm-num-exp-sign? s (+ i 1) n))
|
||
(:else false))))
|
||
|
||
(define
|
||
scm-num-need-digits?
|
||
(fn
|
||
(s i n had-int)
|
||
(cond
|
||
((>= i n) had-int)
|
||
((lex-digit? (substring s i (+ i 1)))
|
||
(scm-num-frac-tail? s (+ i 1) n))
|
||
(:else false))))
|
||
|
||
(define
|
||
scm-num-frac-tail?
|
||
(fn
|
||
(s i n)
|
||
(cond
|
||
((>= i n) true)
|
||
((lex-digit? (substring s i (+ i 1)))
|
||
(scm-num-frac-tail? s (+ i 1) n))
|
||
((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E"))
|
||
(scm-num-exp-sign? s (+ i 1) n))
|
||
(:else false))))
|
||
|
||
(define
|
||
scm-num-exp-sign?
|
||
(fn
|
||
(s i n)
|
||
(cond
|
||
((>= i n) false)
|
||
((or (= (substring s i (+ i 1)) "+") (= (substring s i (+ i 1)) "-"))
|
||
(scm-num-exp-digits? s (+ i 1) n false))
|
||
(:else (scm-num-exp-digits? s i n false)))))
|
||
|
||
(define
|
||
scm-num-exp-digits?
|
||
(fn
|
||
(s i n had)
|
||
(cond
|
||
((>= i n) had)
|
||
((lex-digit? (substring s i (+ i 1)))
|
||
(scm-num-exp-digits? s (+ i 1) n true))
|
||
(:else false))))
|
||
|
||
;; Named character literals: #\space, #\newline, etc.
|
||
(define
|
||
scm-named-char
|
||
(fn
|
||
(name)
|
||
(cond
|
||
((= name "space") " ")
|
||
((= name "newline") "\n")
|
||
((= name "tab") "\t")
|
||
((= name "return") "\r")
|
||
((= name "null") " |