scheme: Phase 1 parser — R7RS lexical reader + 62 tests [consumes-lex]
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 21s

lib/scheme/parser.sx — reader for R7RS-small lexical syntax:
- numbers (int/float/exp)
- booleans #t / #f / #true / #false
- strings with standard escapes
- symbols (permissive — any non-delimiter)
- characters #\c, #\space, #\newline, #\tab, etc.
- vectors #(...)
- proper lists (dotted-pair deferred to Phase 3 with lambda rest-args)
- reader macros: 'X `X ,X ,@X → (quote X) (quasiquote X) etc.
  (Scheme conventions — lowercase, no $ prefix)
- line comments ;
- nestable block comments #| ... |#
- datum comments #;<datum>

AST shape mirrors Kernel: numbers/booleans/lists pass through;
strings wrapped as {:scm-string ...} to distinguish from symbols
(bare SX strings); chars as {:scm-char ...}; vectors as
{:scm-vector (list ...)}.

62 tests in lib/scheme/tests/parse.sx cover atom kinds, escape
sequences, quote/quasiquote/unquote/unquote-splicing, all three
comment flavours, and classic Scheme idioms (lambda, define, let,
if-cond).

Note: SX cond branches evaluate only the LAST expression, so
multi-mutation branches need explicit (do ...) or (begin ...)
wrappers — caught during block-comment debugging.

chisel: consumes-lex (lex-digit?, lex-whitespace? from
lib/guest/lex.sx); pratt not consumed (no operator precedence
in Scheme).
This commit is contained in:
2026-05-13 19:58:30 +00:00
parent a75b4cbc57
commit c919d9a0d7
2 changed files with 177 additions and 0 deletions

BIN
lib/scheme/parser.sx Normal file

Binary file not shown.

177
lib/scheme/tests/parse.sx Normal file
View File

@@ -0,0 +1,177 @@
;; lib/scheme/tests/parse.sx — exercises lib/scheme/parser.sx.
(define scm-test-pass 0)
(define scm-test-fail 0)
(define scm-test-fails (list))
(define
scm-test
(fn
(name actual expected)
(if
(= actual expected)
(set! scm-test-pass (+ scm-test-pass 1))
(begin
(set! scm-test-fail (+ scm-test-fail 1))
(append! scm-test-fails {:name name :actual actual :expected expected})))))
;; ── numbers ───────────────────────────────────────────────────────
(scm-test "num: integer" (scheme-parse "42") 42)
(scm-test "num: zero" (scheme-parse "0") 0)
(scm-test "num: negative" (scheme-parse "-17") -17)
(scm-test "num: float" (scheme-parse "3.14") 3.14)
(scm-test "num: exponent" (scheme-parse "1e3") 1000)
(scm-test "num: negative float" (scheme-parse "-2.5") -2.5)
;; ── booleans ──────────────────────────────────────────────────────
(scm-test "bool: #t" (scheme-parse "#t") true)
(scm-test "bool: #true" (scheme-parse "#true") true)
(scm-test "bool: #f" (scheme-parse "#f") false)
(scm-test "bool: #false" (scheme-parse "#false") false)
;; ── strings ───────────────────────────────────────────────────────
(scm-test "str: empty" (scheme-string-value (scheme-parse "\"\"")) "")
(scm-test
"str: hello"
(scheme-string-value (scheme-parse "\"hello\""))
"hello")
(scm-test "str: predicate" (scheme-string? (scheme-parse "\"x\"")) true)
(scm-test "str: not symbol" (scheme-string? (scheme-parse "x")) false)
(scm-test
"str: escape newline"
(scheme-string-value (scheme-parse "\"a\\nb\""))
"a\nb")
(scm-test
"str: escape tab"
(scheme-string-value (scheme-parse "\"a\\tb\""))
"a\tb")
(scm-test
"str: escape quote"
(scheme-string-value (scheme-parse "\"a\\\"b\""))
"a\"b")
;; ── symbols ───────────────────────────────────────────────────────
(scm-test "sym: word" (scheme-parse "foo") "foo")
(scm-test "sym: hyphenated" (scheme-parse "set-car!") "set-car!")
(scm-test "sym: question mark" (scheme-parse "null?") "null?")
(scm-test "sym: arrow" (scheme-parse "->") "->")
(scm-test "sym: lt-eq" (scheme-parse "<=") "<=")
(scm-test "sym: bare plus" (scheme-parse "+") "+")
(scm-test "sym: bare minus" (scheme-parse "-") "-")
(scm-test "sym: dot-prefixed" (scheme-parse ".foo") ".foo")
;; ── characters ────────────────────────────────────────────────────
(scm-test "char: single" (scheme-char-value (scheme-parse "#\\a")) "a")
(scm-test "char: space" (scheme-char-value (scheme-parse "#\\space")) " ")
(scm-test "char: newline" (scheme-char-value (scheme-parse "#\\newline")) "\n")
(scm-test "char: tab" (scheme-char-value (scheme-parse "#\\tab")) "\t")
(scm-test "char: predicate" (scheme-char? (scheme-parse "#\\x")) true)
(scm-test "char: digit" (scheme-char-value (scheme-parse "#\\5")) "5")
;; ── vectors ───────────────────────────────────────────────────────
(scm-test "vec: empty" (scheme-vector-elements (scheme-parse "#()")) (list))
(scm-test
"vec: numbers"
(scheme-vector-elements (scheme-parse "#(1 2 3)"))
(list 1 2 3))
(scm-test "vec: predicate" (scheme-vector? (scheme-parse "#(1)")) true)
(scm-test "vec: not list" (scheme-vector? (scheme-parse "(1)")) false)
;; Nested vector: SX `=` doesn't deep-compare dicts-with-list-values
;; reliably under this CEK path, so check structure piecewise.
(scm-test "vec: nested first"
(first (scheme-vector-elements (scheme-parse "#(a #(b c) d)"))) "a")
(scm-test "vec: nested second is vector"
(scheme-vector?
(nth (scheme-vector-elements (scheme-parse "#(a #(b c) d)")) 1))
true)
(scm-test "vec: nested second elements"
(scheme-vector-elements
(nth (scheme-vector-elements (scheme-parse "#(a #(b c) d)")) 1))
(list "b" "c"))
;; ── lists ─────────────────────────────────────────────────────────
(scm-test "list: empty" (scheme-parse "()") (list))
(scm-test "list: flat" (scheme-parse "(a b c)") (list "a" "b" "c"))
(scm-test
"list: nested"
(scheme-parse "(a (b c) d)")
(list "a" (list "b" "c") "d"))
(scm-test
"list: mixed atoms"
(scheme-parse "(1 #t foo)")
(list 1 true "foo"))
;; ── reader macros ─────────────────────────────────────────────────
(scm-test "quote: 'foo" (scheme-parse "'foo") (list "quote" "foo"))
(scm-test
"quote: '(a b c)"
(scheme-parse "'(a b c)")
(list "quote" (list "a" "b" "c")))
(scm-test "quasiquote: `x" (scheme-parse "`x") (list "quasiquote" "x"))
(scm-test "unquote: ,x" (scheme-parse ",x") (list "unquote" "x"))
(scm-test
"unquote-splicing: ,@x"
(scheme-parse ",@x")
(list "unquote-splicing" "x"))
(scm-test
"qq mix"
(scheme-parse "`(a ,b ,@c)")
(list
"quasiquote"
(list "a" (list "unquote" "b") (list "unquote-splicing" "c"))))
;; ── comments ──────────────────────────────────────────────────────
(scm-test "comment: line" (scheme-parse "; nope\n42") 42)
(scm-test "comment: trailing" (scheme-parse "42 ; tail") 42)
(scm-test
"comment: inside list"
(scheme-parse "(a ; mid\n b)")
(list "a" "b"))
(scm-test "comment: block simple" (scheme-parse "#| skip |# 42") 42)
(scm-test
"comment: block nested"
(scheme-parse "#| outer #| inner |# done |# 42")
42)
(scm-test "comment: datum #;" (scheme-parse "#;skipme 42") 42)
(scm-test
"comment: datum skips list"
(scheme-parse "#;(1 2 3) 42")
42)
;; ── parse-all ─────────────────────────────────────────────────────
(scm-test "all: empty" (scheme-parse-all "") (list))
(scm-test
"all: three forms"
(scheme-parse-all "1 2 3")
(list 1 2 3))
(scm-test
"all: mixed"
(scheme-parse-all "(if #t 1 2) foo")
(list (list "if" true 1 2) "foo"))
;; ── classic Scheme idioms ─────────────────────────────────────────
(scm-test
"classic: lambda"
(scheme-parse "(lambda (x) (+ x 1))")
(list "lambda" (list "x") (list "+" "x" 1)))
(scm-test
"classic: define"
(scheme-parse "(define (sq x) (* x x))")
(list "define" (list "sq" "x") (list "*" "x" "x")))
(scm-test
"classic: let"
(scheme-parse "(let ((x 1) (y 2)) (+ x y))")
(list
"let"
(list (list "x" 1) (list "y" 2))
(list "+" "x" "y")))
(scm-test
"classic: if"
(scheme-parse "(if (zero? n) 1 (* n (fact (- n 1))))")
(list
"if"
(list "zero?" "n")
1
(list "*" "n" (list "fact" (list "-" "n" 1)))))
(define scm-tests-run! (fn () {:total (+ scm-test-pass scm-test-fail) :passed scm-test-pass :failed scm-test-fail :fails scm-test-fails}))