kernel: Phase 1 parser — s-expr reader + 54 tests [consumes-lex]
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 49s

R-1RK lexical syntax: numbers, strings, symbols, #t/#f, (), nested lists,
; comments. Strings wrap as {:knl-string ...} to distinguish from symbols
(bare SX strings). Reader macros deferred to Phase 6 per plan.
Consumes lib/guest/lex.sx character predicates.
This commit is contained in:
2026-05-10 20:42:53 +00:00
parent 57a84b372d
commit cbba642d7f
3 changed files with 378 additions and 3 deletions

240
lib/kernel/parser.sx Normal file
View File

@@ -0,0 +1,240 @@
;; lib/kernel/parser.sx — Kernel s-expression reader.
;;
;; Reads R-1RK lexical syntax: numbers, strings, symbols, booleans (#t/#f),
;; the empty list (), nested lists, and ; line comments. Reader macros
;; (' ` , ,@) deferred to Phase 6 per the plan.
;;
;; Public AST shape:
;; number → SX number
;; #t / #f → SX true / false
;; () → SX empty list (Kernel's nil — the empty list)
;; "..." → {:knl-string "..."} wrapped to distinguish from symbols
;; foo → "foo" bare SX string is a Kernel symbol
;; (a b c) → SX list of forms
;;
;; Public API:
;; (kernel-parse SRC) — first form; errors on extra trailing input
;; (kernel-parse-all SRC) — all top-level forms, as SX list
;; (kernel-string? V) — recognise wrapped string literal
;; (kernel-string-value V) — extract the underlying string
;;
;; Consumes: lib/guest/lex.sx (lex-digit?, lex-whitespace?)
(define kernel-string-make (fn (s) {:knl-string s}))
(define
kernel-string?
(fn (v) (and (dict? v) (string? (get v :knl-string)))))
(define kernel-string-value (fn (v) (get v :knl-string)))
;; Atom delimiters: characters that end a symbol or numeric token.
(define
knl-delim?
(fn
(c)
(or
(nil? c)
(lex-whitespace? c)
(= c "(")
(= c ")")
(= c "\"")
(= c ";"))))
;; Numeric grammar: [+-]? (digit+ ('.' digit+)? | '.' digit+) ([eE][+-]?digit+)?
(define
knl-numeric?
(fn
(s)
(let
((n (string-length s)))
(cond
((= n 0) false)
(:else
(let
((c0 (substring s 0 1)))
(let
((start (if (or (= c0 "+") (= c0 "-")) 1 0)))
(knl-num-body? s start n))))))))
(define
knl-num-body?
(fn
(s start n)
(cond
((>= start n) false)
((= (substring s start (+ start 1)) ".")
(knl-num-need-digits? s (+ start 1) n false))
((lex-digit? (substring s start (+ start 1)))
(knl-num-int-tail? s (+ start 1) n))
(:else false))))
(define
knl-num-int-tail?
(fn
(s i n)
(cond
((>= i n) true)
((lex-digit? (substring s i (+ i 1)))
(knl-num-int-tail? s (+ i 1) n))
((= (substring s i (+ i 1)) ".")
(knl-num-need-digits? s (+ i 1) n true))
((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E"))
(knl-num-exp-sign? s (+ i 1) n))
(:else false))))
(define
knl-num-need-digits?
(fn
(s i n had-int)
(cond
((>= i n) had-int)
((lex-digit? (substring s i (+ i 1)))
(knl-num-frac-tail? s (+ i 1) n))
(:else false))))
(define
knl-num-frac-tail?
(fn
(s i n)
(cond
((>= i n) true)
((lex-digit? (substring s i (+ i 1)))
(knl-num-frac-tail? s (+ i 1) n))
((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E"))
(knl-num-exp-sign? s (+ i 1) n))
(:else false))))
(define
knl-num-exp-sign?
(fn
(s i n)
(cond
((>= i n) false)
((or (= (substring s i (+ i 1)) "+") (= (substring s i (+ i 1)) "-"))
(knl-num-exp-digits? s (+ i 1) n false))
(:else (knl-num-exp-digits? s i n false)))))
(define
knl-num-exp-digits?
(fn
(s i n had)
(cond
((>= i n) had)
((lex-digit? (substring s i (+ i 1)))
(knl-num-exp-digits? s (+ i 1) n true))
(:else false))))
;; Reader: a closure over (src, pos). Exposes :read-form and :read-all.
(define
knl-make-reader
(fn
(src)
(let
((pos 0) (n (string-length src)))
(define
at
(fn () (if (< pos n) (substring src pos (+ pos 1)) nil)))
(define adv (fn () (set! pos (+ pos 1))))
(define
skip-line
(fn () (when (and (at) (not (= (at) "\n"))) (adv) (skip-line))))
(define
skip-ws
(fn
()
(cond
((nil? (at)) nil)
((lex-whitespace? (at)) (do (adv) (skip-ws)))
((= (at) ";") (do (adv) (skip-line) (skip-ws)))
(:else nil))))
(define
read-string-body
(fn
(acc)
(cond
((nil? (at)) (error "kernel-parse: unterminated string"))
((= (at) "\"") (do (adv) acc))
((= (at) "\\")
(do
(adv)
(let
((c (at)))
(when (nil? c) (error "kernel-parse: trailing backslash"))
(adv)
(read-string-body
(str
acc
(cond
((= c "n") "\n")
((= c "t") "\t")
((= c "r") "\r")
((= c "\"") "\"")
((= c "\\") "\\")
(:else c)))))))
(:else
(let ((c (at))) (adv) (read-string-body (str acc c)))))))
(define
read-atom-body
(fn
(acc)
(cond
((knl-delim? (at)) acc)
(:else (let ((c (at))) (adv) (read-atom-body (str acc c)))))))
(define
classify-atom
(fn
(s)
(cond
((= s "#t") true)
((= s "#f") false)
((knl-numeric? s) (string->number s))
(:else s))))
(define
read-form
(fn
()
(skip-ws)
(cond
((nil? (at)) :knl-eof)
((= (at) ")") (error "kernel-parse: unexpected ')'"))
((= (at) "(") (do (adv) (read-list (list))))
((= (at) "\"")
(do (adv) (kernel-string-make (read-string-body ""))))
(:else (classify-atom (read-atom-body ""))))))
(define
read-list
(fn
(acc)
(skip-ws)
(cond
((nil? (at)) (error "kernel-parse: unterminated list"))
((= (at) ")") (do (adv) acc))
(:else (read-list (append acc (list (read-form))))))))
(define
read-all
(fn
(acc)
(skip-ws)
(if (nil? (at)) acc (read-all (append acc (list (read-form)))))))
{:read-form read-form :read-all read-all})))
(define
kernel-parse-all
(fn (src) ((get (knl-make-reader src) :read-all) (list))))
(define
kernel-parse
(fn
(src)
(let
((r (knl-make-reader src)))
(let
((form ((get r :read-form))))
(cond
((= form :knl-eof) (error "kernel-parse: empty input"))
(:else
(let
((next ((get r :read-form))))
(if
(= next :knl-eof)
form
(error "kernel-parse: trailing input after first form")))))))))

134
lib/kernel/tests/parse.sx Normal file
View File

@@ -0,0 +1,134 @@
;; lib/kernel/tests/parse.sx — exercises lib/kernel/parser.sx.
(define knl-test-pass 0)
(define knl-test-fail 0)
(define knl-test-fails (list))
(define
knl-test
(fn
(name actual expected)
(if
(= actual expected)
(set! knl-test-pass (+ knl-test-pass 1))
(begin
(set! knl-test-fail (+ knl-test-fail 1))
(append! knl-test-fails {:name name :actual actual :expected expected})))))
;; ── atoms: numbers ────────────────────────────────────────────────
(knl-test "num: integer" (kernel-parse "42") 42)
(knl-test "num: zero" (kernel-parse "0") 0)
(knl-test "num: negative integer" (kernel-parse "-7") -7)
(knl-test "num: positive sign" (kernel-parse "+5") 5)
(knl-test "num: float" (kernel-parse "3.14") 3.14)
(knl-test "num: negative float" (kernel-parse "-2.5") -2.5)
(knl-test "num: leading dot" (kernel-parse ".5") 0.5)
(knl-test "num: exponent" (kernel-parse "1e3") 1000)
(knl-test "num: exponent with sign" (kernel-parse "2.5e-1") 0.25)
(knl-test "num: capital E exponent" (kernel-parse "1E2") 100)
;; ── atoms: booleans ───────────────────────────────────────────────
(knl-test "bool: true" (kernel-parse "#t") true)
(knl-test "bool: false" (kernel-parse "#f") false)
;; ── atoms: empty list (Kernel nil) ────────────────────────────────
(knl-test "nil: ()" (kernel-parse "()") (list))
(knl-test "nil: (= () (list))" (= (kernel-parse "()") (list)) true)
;; ── atoms: symbols ────────────────────────────────────────────────
(knl-test "sym: word" (kernel-parse "foo") "foo")
(knl-test "sym: hyphenated" (kernel-parse "foo-bar") "foo-bar")
(knl-test "sym: dollar-bang" (kernel-parse "$define!") "$define!")
(knl-test "sym: question" (kernel-parse "null?") "null?")
(knl-test "sym: lt-eq" (kernel-parse "<=") "<=")
(knl-test "sym: bare plus" (kernel-parse "+") "+")
(knl-test "sym: bare minus" (kernel-parse "-") "-")
(knl-test "sym: plus-letter" (kernel-parse "+a") "+a")
(knl-test "sym: arrow" (kernel-parse "->") "->")
(knl-test "sym: dot-prefixed" (kernel-parse ".foo") ".foo")
;; ── atoms: strings ────────────────────────────────────────────────
(knl-test "str: empty" (kernel-string-value (kernel-parse "\"\"")) "")
(knl-test
"str: hello"
(kernel-string-value (kernel-parse "\"hello\""))
"hello")
(knl-test "str: predicate" (kernel-string? (kernel-parse "\"x\"")) true)
(knl-test "str: not symbol" (kernel-string? (kernel-parse "x")) false)
(knl-test
"str: escape newline"
(kernel-string-value (kernel-parse "\"a\\nb\""))
"a\nb")
(knl-test
"str: escape tab"
(kernel-string-value (kernel-parse "\"a\\tb\""))
"a\tb")
(knl-test
"str: escape quote"
(kernel-string-value (kernel-parse "\"a\\\"b\""))
"a\"b")
(knl-test
"str: escape backslash"
(kernel-string-value (kernel-parse "\"a\\\\b\""))
"a\\b")
;; ── lists ─────────────────────────────────────────────────────────
(knl-test "list: flat" (kernel-parse "(a b c)") (list "a" "b" "c"))
(knl-test
"list: nested"
(kernel-parse "(a (b c) d)")
(list "a" (list "b" "c") "d"))
(knl-test
"list: deeply nested"
(kernel-parse "(((x)))")
(list (list (list "x"))))
(knl-test
"list: mixed atoms"
(kernel-parse "(1 #t foo)")
(list 1 true "foo"))
(knl-test
"list: empty inside"
(kernel-parse "(a () b)")
(list "a" (list) "b"))
;; ── whitespace + comments ─────────────────────────────────────────
(knl-test "ws: leading" (kernel-parse " 42") 42)
(knl-test "ws: trailing" (kernel-parse "42 ") 42)
(knl-test "ws: tabs/newlines" (kernel-parse "\n\t 42 \n") 42)
(knl-test "comment: line" (kernel-parse "; nope\n42") 42)
(knl-test "comment: trailing" (kernel-parse "42 ; tail") 42)
(knl-test
"comment: inside list"
(kernel-parse "(a ; mid\n b)")
(list "a" "b"))
;; ── parse-all ─────────────────────────────────────────────────────
(knl-test "all: empty input" (kernel-parse-all "") (list))
(knl-test "all: only whitespace" (kernel-parse-all " ") (list))
(knl-test "all: only comment" (kernel-parse-all "; nope") (list))
(knl-test
"all: three forms"
(kernel-parse-all "1 2 3")
(list 1 2 3))
(knl-test
"all: mixed"
(kernel-parse-all "($if #t 1 2) foo")
(list (list "$if" true 1 2) "foo"))
;; ── classic Kernel programs (smoke) ───────────────────────────────
(knl-test
"klisp: vau form"
(kernel-parse "($vau (x e) e (eval x e))")
(list "$vau" (list "x" "e") "e" (list "eval" "x" "e")))
(knl-test
"klisp: define lambda"
(kernel-parse "($define! sq ($lambda (x) (* x x)))")
(list "$define!" "sq" (list "$lambda" (list "x") (list "*" "x" "x"))))
;; ── round-trip identity for primitive symbols ─────────────────────
(knl-test "identity: $vau" (kernel-parse "$vau") "$vau")
(knl-test "identity: $lambda" (kernel-parse "$lambda") "$lambda")
(knl-test "identity: wrap" (kernel-parse "wrap") "wrap")
(knl-test "identity: unwrap" (kernel-parse "unwrap") "unwrap")
(define knl-tests-run! (fn () {:total (+ knl-test-pass knl-test-fail) :passed knl-test-pass :failed knl-test-fail :fails knl-test-fails}))

View File

@@ -56,9 +56,9 @@ The whole interesting thing: there are no special forms hardcoded in the evaluat
## Roadmap
### Phase 1 — Parser
- [ ] S-expression reader with the standard atoms (number, string, symbol, boolean, nil) and lists.
- [x] S-expression reader with the standard atoms (number, string, symbol, boolean, nil) and lists.
- [ ] Reader macros optional; defer to Phase 6.
- [ ] Tests in `lib/kernel/tests/parse.sx`.
- [x] Tests in `lib/kernel/tests/parse.sx`.
### Phase 2 — Core evaluator with first-class environments
- [ ] `kernel-eval expr env` — primary entry, walks AST, threads env as a value.
@@ -108,7 +108,8 @@ The whole interesting thing: there are no special forms hardcoded in the evaluat
- Klisp implementation (Andres Navarro) — pragmatic reference.
## Progress log
_(awaiting Phase 1 — depends on stable env-as-value substrate state)_
- 2026-05-10 — Phase 1 parser landed. `lib/kernel/parser.sx` reads R-1RK lexical syntax: numbers (int/float/exp), strings (with escapes), symbols (permissive — anything non-delimiting), booleans `#t`/`#f`, the empty list `()`, nested lists, and `;` line comments. Reader macros (`'` `,` `,@`) deferred per plan. AST: numbers/booleans/lists pass through; strings are wrapped as `{:knl-string …}` to distinguish from symbols which are bare SX strings. 54 tests in `lib/kernel/tests/parse.sx` pass via `sx_server.exe` epoch protocol. chisel: consumes-lex (uses `lex-digit?` and `lex-whitespace?` from `lib/guest/lex.sx` — pratt deliberately not consumed because Kernel is plain s-expressions, no precedence climbing).
## Blockers
_(none yet — main risk is substrate gap discovery during Phase 2)_