From c919d9a0d7510ce9c8c7fabe8842a2ab72c24755 Mon Sep 17 00:00:00 2001 From: giles Date: Wed, 13 May 2026 19:58:30 +0000 Subject: [PATCH] =?UTF-8?q?scheme:=20Phase=201=20parser=20=E2=80=94=20R7RS?= =?UTF-8?q?=20lexical=20reader=20+=2062=20tests=20[consumes-lex]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/scheme/parser.sx — reader for R7RS-small lexical syntax: - numbers (int/float/exp) - booleans #t / #f / #true / #false - strings with standard escapes - symbols (permissive — any non-delimiter) - characters #\c, #\space, #\newline, #\tab, etc. - vectors #(...) - proper lists (dotted-pair deferred to Phase 3 with lambda rest-args) - reader macros: 'X `X ,X ,@X → (quote X) (quasiquote X) etc. (Scheme conventions — lowercase, no $ prefix) - line comments ; - nestable block comments #| ... |# - datum comments #; AST shape mirrors Kernel: numbers/booleans/lists pass through; strings wrapped as {:scm-string ...} to distinguish from symbols (bare SX strings); chars as {:scm-char ...}; vectors as {:scm-vector (list ...)}. 62 tests in lib/scheme/tests/parse.sx cover atom kinds, escape sequences, quote/quasiquote/unquote/unquote-splicing, all three comment flavours, and classic Scheme idioms (lambda, define, let, if-cond). Note: SX cond branches evaluate only the LAST expression, so multi-mutation branches need explicit (do ...) or (begin ...) wrappers — caught during block-comment debugging. chisel: consumes-lex (lex-digit?, lex-whitespace? from lib/guest/lex.sx); pratt not consumed (no operator precedence in Scheme). --- lib/scheme/parser.sx | Bin 0 -> 10898 bytes lib/scheme/tests/parse.sx | 177 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 lib/scheme/parser.sx create mode 100644 lib/scheme/tests/parse.sx diff --git a/lib/scheme/parser.sx b/lib/scheme/parser.sx new file mode 100644 index 0000000000000000000000000000000000000000..bee741a1cf2f1bc501c23241847fd3b18150e93e GIT binary patch literal 10898 zcmd5?TW{RP74BOL{e`}qt{%jNMagj<3P^QiBgsR7qA=_tFc1Y0mpiK^#3i>RR~N)a z^OPS@^cUur^gCy+9L_4Tn};qEi{zX+m+xHW%#i2jR9B1BzFez~I^7msuey`|o__i1 zKlJv`Z{Owprl@PG)qPbKHFc_3g5co%9RJZ<-1XG#HVf7D$FykMS``hh%5~8dip`Mmc_8!1kVonA#-gMcrI#1i@Ggu90hZ@I{ZKs zDIiwEQBQ4CKd`7qsU=9-rc_iHo5ix|>8={qu(KZ^{JO{E#z}qs?jO{zi>(-%+aEI` zgE}tgm!JPl@7|jSdhl?dQ#u^FowB2HeNKlf9#|DM^fUA1Mo-J;%#VL&Ol`KqgE&VM z%z)F(5&U@8mz!Msmt^Q8kKJ_zLq~mUUC#b|pasA+>Rpz?M9a}^IAmy6&V&Vz_K2EV zYX>!> z_r`>)G)>W05#e!)aM#3CuPj+0(op67wysJ*Y^1Z2Z*~hjqt|c#YOK>$0a1^tK=0mu zZNh_V!>Z~4Dcg5Whll<8F2UN;q#bg?Hz2h&l5iaNz1zb$H{2iUL*qIfw-@!2&farFZm zpoNX6b3lstQ_2iL{K)YeQ|kzf==H-~)u_2#y1rxo^SrOyrr&K;e+JDLr`J1Fg;SJX zF31`F=F9538ZM5x9Orjy7$@0qF+VuSmTHBBAfu61Jl_;Iin3J$P3I;hOepss=W{#h z0Ldox-{kTI9>}EIIb~ctvt?Bd7sR&~{_I)}bW1@6nDfuF95Xc7IG(Aw6B7i~gaJ|z z(&~$AK2dp|lT=;g>mW%fZstwYHooTR~qnZ=zdWoh@E{seh z;SnOLoa?`BCx7vzsp<=?pY}G2>R{w0l{Cv{j#BRD(f#EtdT<`yA4T_9(fx6B|6wK} z&Di|in0nn6n@!Q35q3j@)~xFdluC0Uz29%1d^&|$Oo*b6EWWT;#Mm)4VdFH2CV|jX~1{{OZiS& zmG+xavH8_FPMlMnV2u)!v1>&rtc)Gtc|93;%~I>?`RX7XIXC33bY$)w{}$nDbPRl< z?zeegT{p%Tu`y5ZLl`#`SJ1jx<^W7f4s%*!SX?q9VE}z&Fb*znZNiWhDtj1JpJ9n- zF|p$@{eOdy@{;$&)c>jPUG#hsV51tInc})PZcWAAJI-Y|9oMot*7B+79vqL_R9)?X+Hzp~}t3Pi&eLF;ETY(no@KQSPV_cXm z=`r!1G+yDEfox?%yLm^OG8t<(MrbCiQKn|(79d&JWzdM@;mam5j?)eq8xmuk8g?Bf zVQQBcZgzFegTG6U?HfYJr?8vF#D66wcDqHpgEHySMN!^3bN`T-uKE(^AUyq#iP54i zx(yHh-VbU=qQ%rL1l-8SRSLaZS-1TT8~gSaCr?@5mB+Mg`+4j%ZsBu%;1zle^u}qv z3(7Ju;C?I7qk-IIoY|$h{3~p8hoI(N8W?c@^cU8Lm)|gpyHcKFrR70xOtfu=S<3{Xw`>}@5h|4BZrph-VlmFba4@n3 zx{qw(%~zytWJ_A5XMsv%272=oA|k;sHc5MwgTqcvM#o+6bbBndCeUVKG;nq}HVlpK z<${A931Q&kohT~b1sgvNskG4&?%4EB=s9Vu2UBw;b8JxkMvibHlMt8+iWp`tyyiSg z_Jt0p2=AhF2M7Y;#OvXaZpss`nEIwD+Q*PYxtSq{9(XV4;4YQ&jk18gXY63vEXJmgZ z5|(_DKr`LA_?|~I4gY0lfl#`QOWy#6B>LJAq6r$cLVF3t_p{)jX+I@g51!Ki@H33? zf(d=!;>Yckp_6vbw?=l<|26fE+YSDWAf6oj@HCDv)Epa6cAbdbE3PCs4I!m}hIKM! z3!EbUuL;YCJo^056u>CSp}H<1k6GXpd9wZ`a-pwQ4>?bD@JVBaGhB&Hz-3OL_~a9( z09;n}ncP=6!NPk`K9sZSahbM$NP>m;gzHd{yM~2!qV-fThEeCW@#9JLV^W(8BT`|z zY$wk!VPD?s;$n*%Crz+0@gy5TeC-TpLG~wm|IjcVFbb|FwGu<0u3*9-@UcoG+0q1l zg^dl-MkUP|?~bjJ#vac75Fkk_d8vTy;}jk<%O7%Ec7lRM)-;aha9_V<5XSQ2QQj(Y z%*93;=234T6ZsV$^z_%$$tKry@5jx9Vq7Rm(jlT=8pP`W0J3Kpsae^4P?e*G5C zsxcM-5rXbWd$GW8E`1jv>;lFeg!J ztftVCcVN_Cs^cu}^G1M1VHsEun+pe^)ul{e8e1*?Z~oLNePx@Mm6n;}XBw&yR~7GK3BCwO`F z&WOQ)m7ytOh5aI9R1f!If%k_g2L5x6Cvov_9bMohR=sH@V(NWK=wW&F>kFPXeEMC; zBib7%Z4<^{*Ua4KC*yf>B$+v*Udt>$aS3x%lWD|iyI70TDC!sW*#$PDjy?t5D3j8Q z#ptAl71*CBl4>pTOh*wJb0?}K6_9g8F&_>9Kc_ZfKb!J|}c zeN-h{=#>SZ=R83-Bm2iQnZ!j-zTV5_NyYmhOHQhrD0@1C({Dw+yoedU+hEaL-V>^@>C9(hj;1v4>DYuAyt zTy${h9Icd`oEj9C#MOFFP1axE$wyS") "->") +(scm-test "sym: lt-eq" (scheme-parse "<=") "<=") +(scm-test "sym: bare plus" (scheme-parse "+") "+") +(scm-test "sym: bare minus" (scheme-parse "-") "-") +(scm-test "sym: dot-prefixed" (scheme-parse ".foo") ".foo") + +;; ── characters ──────────────────────────────────────────────────── +(scm-test "char: single" (scheme-char-value (scheme-parse "#\\a")) "a") +(scm-test "char: space" (scheme-char-value (scheme-parse "#\\space")) " ") +(scm-test "char: newline" (scheme-char-value (scheme-parse "#\\newline")) "\n") +(scm-test "char: tab" (scheme-char-value (scheme-parse "#\\tab")) "\t") +(scm-test "char: predicate" (scheme-char? (scheme-parse "#\\x")) true) +(scm-test "char: digit" (scheme-char-value (scheme-parse "#\\5")) "5") + +;; ── vectors ─────────────────────────────────────────────────────── +(scm-test "vec: empty" (scheme-vector-elements (scheme-parse "#()")) (list)) +(scm-test + "vec: numbers" + (scheme-vector-elements (scheme-parse "#(1 2 3)")) + (list 1 2 3)) +(scm-test "vec: predicate" (scheme-vector? (scheme-parse "#(1)")) true) +(scm-test "vec: not list" (scheme-vector? (scheme-parse "(1)")) false) +;; Nested vector: SX `=` doesn't deep-compare dicts-with-list-values +;; reliably under this CEK path, so check structure piecewise. +(scm-test "vec: nested first" + (first (scheme-vector-elements (scheme-parse "#(a #(b c) d)"))) "a") +(scm-test "vec: nested second is vector" + (scheme-vector? + (nth (scheme-vector-elements (scheme-parse "#(a #(b c) d)")) 1)) + true) +(scm-test "vec: nested second elements" + (scheme-vector-elements + (nth (scheme-vector-elements (scheme-parse "#(a #(b c) d)")) 1)) + (list "b" "c")) + +;; ── lists ───────────────────────────────────────────────────────── +(scm-test "list: empty" (scheme-parse "()") (list)) +(scm-test "list: flat" (scheme-parse "(a b c)") (list "a" "b" "c")) +(scm-test + "list: nested" + (scheme-parse "(a (b c) d)") + (list "a" (list "b" "c") "d")) +(scm-test + "list: mixed atoms" + (scheme-parse "(1 #t foo)") + (list 1 true "foo")) + +;; ── reader macros ───────────────────────────────────────────────── +(scm-test "quote: 'foo" (scheme-parse "'foo") (list "quote" "foo")) +(scm-test + "quote: '(a b c)" + (scheme-parse "'(a b c)") + (list "quote" (list "a" "b" "c"))) +(scm-test "quasiquote: `x" (scheme-parse "`x") (list "quasiquote" "x")) +(scm-test "unquote: ,x" (scheme-parse ",x") (list "unquote" "x")) +(scm-test + "unquote-splicing: ,@x" + (scheme-parse ",@x") + (list "unquote-splicing" "x")) +(scm-test + "qq mix" + (scheme-parse "`(a ,b ,@c)") + (list + "quasiquote" + (list "a" (list "unquote" "b") (list "unquote-splicing" "c")))) + +;; ── comments ────────────────────────────────────────────────────── +(scm-test "comment: line" (scheme-parse "; nope\n42") 42) +(scm-test "comment: trailing" (scheme-parse "42 ; tail") 42) +(scm-test + "comment: inside list" + (scheme-parse "(a ; mid\n b)") + (list "a" "b")) +(scm-test "comment: block simple" (scheme-parse "#| skip |# 42") 42) +(scm-test + "comment: block nested" + (scheme-parse "#| outer #| inner |# done |# 42") + 42) +(scm-test "comment: datum #;" (scheme-parse "#;skipme 42") 42) +(scm-test + "comment: datum skips list" + (scheme-parse "#;(1 2 3) 42") + 42) + +;; ── parse-all ───────────────────────────────────────────────────── +(scm-test "all: empty" (scheme-parse-all "") (list)) +(scm-test + "all: three forms" + (scheme-parse-all "1 2 3") + (list 1 2 3)) +(scm-test + "all: mixed" + (scheme-parse-all "(if #t 1 2) foo") + (list (list "if" true 1 2) "foo")) + +;; ── classic Scheme idioms ───────────────────────────────────────── +(scm-test + "classic: lambda" + (scheme-parse "(lambda (x) (+ x 1))") + (list "lambda" (list "x") (list "+" "x" 1))) +(scm-test + "classic: define" + (scheme-parse "(define (sq x) (* x x))") + (list "define" (list "sq" "x") (list "*" "x" "x"))) +(scm-test + "classic: let" + (scheme-parse "(let ((x 1) (y 2)) (+ x y))") + (list + "let" + (list (list "x" 1) (list "y" 2)) + (list "+" "x" "y"))) +(scm-test + "classic: if" + (scheme-parse "(if (zero? n) 1 (* n (fact (- n 1))))") + (list + "if" + (list "zero?" "n") + 1 + (list "*" "n" (list "fact" (list "-" "n" 1))))) + +(define scm-tests-run! (fn () {:total (+ scm-test-pass scm-test-fail) :passed scm-test-pass :failed scm-test-fail :fails scm-test-fails}))