From cbba642d7f83d1f2383fcb0361f2da467a741640 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 20:42:53 +0000 Subject: [PATCH] =?UTF-8?q?kernel:=20Phase=201=20parser=20=E2=80=94=20s-ex?= =?UTF-8?q?pr=20reader=20+=2054=20tests=20[consumes-lex]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R-1RK lexical syntax: numbers, strings, symbols, #t/#f, (), nested lists, ; comments. Strings wrap as {:knl-string ...} to distinguish from symbols (bare SX strings). Reader macros deferred to Phase 6 per plan. Consumes lib/guest/lex.sx character predicates. --- lib/kernel/parser.sx | 240 ++++++++++++++++++++++++++++++++++++++ lib/kernel/tests/parse.sx | 134 +++++++++++++++++++++ plans/kernel-on-sx.md | 7 +- 3 files changed, 378 insertions(+), 3 deletions(-) create mode 100644 lib/kernel/parser.sx create mode 100644 lib/kernel/tests/parse.sx diff --git a/lib/kernel/parser.sx b/lib/kernel/parser.sx new file mode 100644 index 00000000..bd1d76c1 --- /dev/null +++ b/lib/kernel/parser.sx @@ -0,0 +1,240 @@ +;; lib/kernel/parser.sx — Kernel s-expression reader. +;; +;; Reads R-1RK lexical syntax: numbers, strings, symbols, booleans (#t/#f), +;; the empty list (), nested lists, and ; line comments. Reader macros +;; (' ` , ,@) deferred to Phase 6 per the plan. +;; +;; Public AST shape: +;; number → SX number +;; #t / #f → SX true / false +;; () → SX empty list (Kernel's nil — the empty list) +;; "..." → {:knl-string "..."} wrapped to distinguish from symbols +;; foo → "foo" bare SX string is a Kernel symbol +;; (a b c) → SX list of forms +;; +;; Public API: +;; (kernel-parse SRC) — first form; errors on extra trailing input +;; (kernel-parse-all SRC) — all top-level forms, as SX list +;; (kernel-string? V) — recognise wrapped string literal +;; (kernel-string-value V) — extract the underlying string +;; +;; Consumes: lib/guest/lex.sx (lex-digit?, lex-whitespace?) + +(define kernel-string-make (fn (s) {:knl-string s})) +(define + kernel-string? + (fn (v) (and (dict? v) (string? (get v :knl-string))))) +(define kernel-string-value (fn (v) (get v :knl-string))) + +;; Atom delimiters: characters that end a symbol or numeric token. +(define + knl-delim? + (fn + (c) + (or + (nil? c) + (lex-whitespace? c) + (= c "(") + (= c ")") + (= c "\"") + (= c ";")))) + +;; Numeric grammar: [+-]? (digit+ ('.' digit+)? | '.' digit+) ([eE][+-]?digit+)? +(define + knl-numeric? + (fn + (s) + (let + ((n (string-length s))) + (cond + ((= n 0) false) + (:else + (let + ((c0 (substring s 0 1))) + (let + ((start (if (or (= c0 "+") (= c0 "-")) 1 0))) + (knl-num-body? s start n)))))))) + +(define + knl-num-body? + (fn + (s start n) + (cond + ((>= start n) false) + ((= (substring s start (+ start 1)) ".") + (knl-num-need-digits? s (+ start 1) n false)) + ((lex-digit? (substring s start (+ start 1))) + (knl-num-int-tail? s (+ start 1) n)) + (:else false)))) + +(define + knl-num-int-tail? + (fn + (s i n) + (cond + ((>= i n) true) + ((lex-digit? (substring s i (+ i 1))) + (knl-num-int-tail? s (+ i 1) n)) + ((= (substring s i (+ i 1)) ".") + (knl-num-need-digits? s (+ i 1) n true)) + ((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E")) + (knl-num-exp-sign? s (+ i 1) n)) + (:else false)))) + +(define + knl-num-need-digits? + (fn + (s i n had-int) + (cond + ((>= i n) had-int) + ((lex-digit? (substring s i (+ i 1))) + (knl-num-frac-tail? s (+ i 1) n)) + (:else false)))) + +(define + knl-num-frac-tail? + (fn + (s i n) + (cond + ((>= i n) true) + ((lex-digit? (substring s i (+ i 1))) + (knl-num-frac-tail? s (+ i 1) n)) + ((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E")) + (knl-num-exp-sign? s (+ i 1) n)) + (:else false)))) + +(define + knl-num-exp-sign? + (fn + (s i n) + (cond + ((>= i n) false) + ((or (= (substring s i (+ i 1)) "+") (= (substring s i (+ i 1)) "-")) + (knl-num-exp-digits? s (+ i 1) n false)) + (:else (knl-num-exp-digits? s i n false))))) + +(define + knl-num-exp-digits? + (fn + (s i n had) + (cond + ((>= i n) had) + ((lex-digit? (substring s i (+ i 1))) + (knl-num-exp-digits? s (+ i 1) n true)) + (:else false)))) + +;; Reader: a closure over (src, pos). Exposes :read-form and :read-all. +(define + knl-make-reader + (fn + (src) + (let + ((pos 0) (n (string-length src))) + (define + at + (fn () (if (< pos n) (substring src pos (+ pos 1)) nil))) + (define adv (fn () (set! pos (+ pos 1)))) + (define + skip-line + (fn () (when (and (at) (not (= (at) "\n"))) (adv) (skip-line)))) + (define + skip-ws + (fn + () + (cond + ((nil? (at)) nil) + ((lex-whitespace? (at)) (do (adv) (skip-ws))) + ((= (at) ";") (do (adv) (skip-line) (skip-ws))) + (:else nil)))) + (define + read-string-body + (fn + (acc) + (cond + ((nil? (at)) (error "kernel-parse: unterminated string")) + ((= (at) "\"") (do (adv) acc)) + ((= (at) "\\") + (do + (adv) + (let + ((c (at))) + (when (nil? c) (error "kernel-parse: trailing backslash")) + (adv) + (read-string-body + (str + acc + (cond + ((= c "n") "\n") + ((= c "t") "\t") + ((= c "r") "\r") + ((= c "\"") "\"") + ((= c "\\") "\\") + (:else c))))))) + (:else + (let ((c (at))) (adv) (read-string-body (str acc c))))))) + (define + read-atom-body + (fn + (acc) + (cond + ((knl-delim? (at)) acc) + (:else (let ((c (at))) (adv) (read-atom-body (str acc c))))))) + (define + classify-atom + (fn + (s) + (cond + ((= s "#t") true) + ((= s "#f") false) + ((knl-numeric? s) (string->number s)) + (:else s)))) + (define + read-form + (fn + () + (skip-ws) + (cond + ((nil? (at)) :knl-eof) + ((= (at) ")") (error "kernel-parse: unexpected ')'")) + ((= (at) "(") (do (adv) (read-list (list)))) + ((= (at) "\"") + (do (adv) (kernel-string-make (read-string-body "")))) + (:else (classify-atom (read-atom-body "")))))) + (define + read-list + (fn + (acc) + (skip-ws) + (cond + ((nil? (at)) (error "kernel-parse: unterminated list")) + ((= (at) ")") (do (adv) acc)) + (:else (read-list (append acc (list (read-form)))))))) + (define + read-all + (fn + (acc) + (skip-ws) + (if (nil? (at)) acc (read-all (append acc (list (read-form))))))) + {:read-form read-form :read-all read-all}))) + +(define + kernel-parse-all + (fn (src) ((get (knl-make-reader src) :read-all) (list)))) + +(define + kernel-parse + (fn + (src) + (let + ((r (knl-make-reader src))) + (let + ((form ((get r :read-form)))) + (cond + ((= form :knl-eof) (error "kernel-parse: empty input")) + (:else + (let + ((next ((get r :read-form)))) + (if + (= next :knl-eof) + form + (error "kernel-parse: trailing input after first form"))))))))) diff --git a/lib/kernel/tests/parse.sx b/lib/kernel/tests/parse.sx new file mode 100644 index 00000000..fdfd7850 --- /dev/null +++ b/lib/kernel/tests/parse.sx @@ -0,0 +1,134 @@ +;; lib/kernel/tests/parse.sx — exercises lib/kernel/parser.sx. + +(define knl-test-pass 0) +(define knl-test-fail 0) +(define knl-test-fails (list)) + +(define + knl-test + (fn + (name actual expected) + (if + (= actual expected) + (set! knl-test-pass (+ knl-test-pass 1)) + (begin + (set! knl-test-fail (+ knl-test-fail 1)) + (append! knl-test-fails {:name name :actual actual :expected expected}))))) + +;; ── atoms: numbers ──────────────────────────────────────────────── +(knl-test "num: integer" (kernel-parse "42") 42) +(knl-test "num: zero" (kernel-parse "0") 0) +(knl-test "num: negative integer" (kernel-parse "-7") -7) +(knl-test "num: positive sign" (kernel-parse "+5") 5) +(knl-test "num: float" (kernel-parse "3.14") 3.14) +(knl-test "num: negative float" (kernel-parse "-2.5") -2.5) +(knl-test "num: leading dot" (kernel-parse ".5") 0.5) +(knl-test "num: exponent" (kernel-parse "1e3") 1000) +(knl-test "num: exponent with sign" (kernel-parse "2.5e-1") 0.25) +(knl-test "num: capital E exponent" (kernel-parse "1E2") 100) + +;; ── atoms: booleans ─────────────────────────────────────────────── +(knl-test "bool: true" (kernel-parse "#t") true) +(knl-test "bool: false" (kernel-parse "#f") false) + +;; ── atoms: empty list (Kernel nil) ──────────────────────────────── +(knl-test "nil: ()" (kernel-parse "()") (list)) +(knl-test "nil: (= () (list))" (= (kernel-parse "()") (list)) true) + +;; ── atoms: symbols ──────────────────────────────────────────────── +(knl-test "sym: word" (kernel-parse "foo") "foo") +(knl-test "sym: hyphenated" (kernel-parse "foo-bar") "foo-bar") +(knl-test "sym: dollar-bang" (kernel-parse "$define!") "$define!") +(knl-test "sym: question" (kernel-parse "null?") "null?") +(knl-test "sym: lt-eq" (kernel-parse "<=") "<=") +(knl-test "sym: bare plus" (kernel-parse "+") "+") +(knl-test "sym: bare minus" (kernel-parse "-") "-") +(knl-test "sym: plus-letter" (kernel-parse "+a") "+a") +(knl-test "sym: arrow" (kernel-parse "->") "->") +(knl-test "sym: dot-prefixed" (kernel-parse ".foo") ".foo") + +;; ── atoms: strings ──────────────────────────────────────────────── +(knl-test "str: empty" (kernel-string-value (kernel-parse "\"\"")) "") +(knl-test + "str: hello" + (kernel-string-value (kernel-parse "\"hello\"")) + "hello") +(knl-test "str: predicate" (kernel-string? (kernel-parse "\"x\"")) true) +(knl-test "str: not symbol" (kernel-string? (kernel-parse "x")) false) +(knl-test + "str: escape newline" + (kernel-string-value (kernel-parse "\"a\\nb\"")) + "a\nb") +(knl-test + "str: escape tab" + (kernel-string-value (kernel-parse "\"a\\tb\"")) + "a\tb") +(knl-test + "str: escape quote" + (kernel-string-value (kernel-parse "\"a\\\"b\"")) + "a\"b") +(knl-test + "str: escape backslash" + (kernel-string-value (kernel-parse "\"a\\\\b\"")) + "a\\b") + +;; ── lists ───────────────────────────────────────────────────────── +(knl-test "list: flat" (kernel-parse "(a b c)") (list "a" "b" "c")) +(knl-test + "list: nested" + (kernel-parse "(a (b c) d)") + (list "a" (list "b" "c") "d")) +(knl-test + "list: deeply nested" + (kernel-parse "(((x)))") + (list (list (list "x")))) +(knl-test + "list: mixed atoms" + (kernel-parse "(1 #t foo)") + (list 1 true "foo")) +(knl-test + "list: empty inside" + (kernel-parse "(a () b)") + (list "a" (list) "b")) + +;; ── whitespace + comments ───────────────────────────────────────── +(knl-test "ws: leading" (kernel-parse " 42") 42) +(knl-test "ws: trailing" (kernel-parse "42 ") 42) +(knl-test "ws: tabs/newlines" (kernel-parse "\n\t 42 \n") 42) +(knl-test "comment: line" (kernel-parse "; nope\n42") 42) +(knl-test "comment: trailing" (kernel-parse "42 ; tail") 42) +(knl-test + "comment: inside list" + (kernel-parse "(a ; mid\n b)") + (list "a" "b")) + +;; ── parse-all ───────────────────────────────────────────────────── +(knl-test "all: empty input" (kernel-parse-all "") (list)) +(knl-test "all: only whitespace" (kernel-parse-all " ") (list)) +(knl-test "all: only comment" (kernel-parse-all "; nope") (list)) +(knl-test + "all: three forms" + (kernel-parse-all "1 2 3") + (list 1 2 3)) +(knl-test + "all: mixed" + (kernel-parse-all "($if #t 1 2) foo") + (list (list "$if" true 1 2) "foo")) + +;; ── classic Kernel programs (smoke) ─────────────────────────────── +(knl-test + "klisp: vau form" + (kernel-parse "($vau (x e) e (eval x e))") + (list "$vau" (list "x" "e") "e" (list "eval" "x" "e"))) +(knl-test + "klisp: define lambda" + (kernel-parse "($define! sq ($lambda (x) (* x x)))") + (list "$define!" "sq" (list "$lambda" (list "x") (list "*" "x" "x")))) + +;; ── round-trip identity for primitive symbols ───────────────────── +(knl-test "identity: $vau" (kernel-parse "$vau") "$vau") +(knl-test "identity: $lambda" (kernel-parse "$lambda") "$lambda") +(knl-test "identity: wrap" (kernel-parse "wrap") "wrap") +(knl-test "identity: unwrap" (kernel-parse "unwrap") "unwrap") + +(define knl-tests-run! (fn () {:total (+ knl-test-pass knl-test-fail) :passed knl-test-pass :failed knl-test-fail :fails knl-test-fails})) diff --git a/plans/kernel-on-sx.md b/plans/kernel-on-sx.md index bb9a2de1..a9daf7a3 100644 --- a/plans/kernel-on-sx.md +++ b/plans/kernel-on-sx.md @@ -56,9 +56,9 @@ The whole interesting thing: there are no special forms hardcoded in the evaluat ## Roadmap ### Phase 1 — Parser -- [ ] S-expression reader with the standard atoms (number, string, symbol, boolean, nil) and lists. +- [x] S-expression reader with the standard atoms (number, string, symbol, boolean, nil) and lists. - [ ] Reader macros optional; defer to Phase 6. -- [ ] Tests in `lib/kernel/tests/parse.sx`. +- [x] Tests in `lib/kernel/tests/parse.sx`. ### Phase 2 — Core evaluator with first-class environments - [ ] `kernel-eval expr env` — primary entry, walks AST, threads env as a value. @@ -108,7 +108,8 @@ The whole interesting thing: there are no special forms hardcoded in the evaluat - Klisp implementation (Andres Navarro) — pragmatic reference. ## Progress log -_(awaiting Phase 1 — depends on stable env-as-value substrate state)_ + +- 2026-05-10 — Phase 1 parser landed. `lib/kernel/parser.sx` reads R-1RK lexical syntax: numbers (int/float/exp), strings (with escapes), symbols (permissive — anything non-delimiting), booleans `#t`/`#f`, the empty list `()`, nested lists, and `;` line comments. Reader macros (`'` `,` `,@`) deferred per plan. AST: numbers/booleans/lists pass through; strings are wrapped as `{:knl-string …}` to distinguish from symbols which are bare SX strings. 54 tests in `lib/kernel/tests/parse.sx` pass via `sx_server.exe` epoch protocol. chisel: consumes-lex (uses `lex-digit?` and `lex-whitespace?` from `lib/guest/lex.sx` — pratt deliberately not consumed because Kernel is plain s-expressions, no precedence climbing). ## Blockers _(none yet — main risk is substrate gap discovery during Phase 2)_