kernel: Phase 1 parser — s-expr reader + 54 tests [consumes-lex]
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 49s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 49s
R-1RK lexical syntax: numbers, strings, symbols, #t/#f, (), nested lists,
; comments. Strings wrap as {:knl-string ...} to distinguish from symbols
(bare SX strings). Reader macros deferred to Phase 6 per plan.
Consumes lib/guest/lex.sx character predicates.
This commit is contained in:
240
lib/kernel/parser.sx
Normal file
240
lib/kernel/parser.sx
Normal file
@@ -0,0 +1,240 @@
|
||||
;; lib/kernel/parser.sx — Kernel s-expression reader.
|
||||
;;
|
||||
;; Reads R-1RK lexical syntax: numbers, strings, symbols, booleans (#t/#f),
|
||||
;; the empty list (), nested lists, and ; line comments. Reader macros
|
||||
;; (' ` , ,@) deferred to Phase 6 per the plan.
|
||||
;;
|
||||
;; Public AST shape:
|
||||
;; number → SX number
|
||||
;; #t / #f → SX true / false
|
||||
;; () → SX empty list (Kernel's nil — the empty list)
|
||||
;; "..." → {:knl-string "..."} wrapped to distinguish from symbols
|
||||
;; foo → "foo" bare SX string is a Kernel symbol
|
||||
;; (a b c) → SX list of forms
|
||||
;;
|
||||
;; Public API:
|
||||
;; (kernel-parse SRC) — first form; errors on extra trailing input
|
||||
;; (kernel-parse-all SRC) — all top-level forms, as SX list
|
||||
;; (kernel-string? V) — recognise wrapped string literal
|
||||
;; (kernel-string-value V) — extract the underlying string
|
||||
;;
|
||||
;; Consumes: lib/guest/lex.sx (lex-digit?, lex-whitespace?)
|
||||
|
||||
(define kernel-string-make (fn (s) {:knl-string s}))
|
||||
(define
|
||||
kernel-string?
|
||||
(fn (v) (and (dict? v) (string? (get v :knl-string)))))
|
||||
(define kernel-string-value (fn (v) (get v :knl-string)))
|
||||
|
||||
;; Atom delimiters: characters that end a symbol or numeric token.
|
||||
(define
|
||||
knl-delim?
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(nil? c)
|
||||
(lex-whitespace? c)
|
||||
(= c "(")
|
||||
(= c ")")
|
||||
(= c "\"")
|
||||
(= c ";"))))
|
||||
|
||||
;; Numeric grammar: [+-]? (digit+ ('.' digit+)? | '.' digit+) ([eE][+-]?digit+)?
|
||||
(define
|
||||
knl-numeric?
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((n (string-length s)))
|
||||
(cond
|
||||
((= n 0) false)
|
||||
(:else
|
||||
(let
|
||||
((c0 (substring s 0 1)))
|
||||
(let
|
||||
((start (if (or (= c0 "+") (= c0 "-")) 1 0)))
|
||||
(knl-num-body? s start n))))))))
|
||||
|
||||
(define
|
||||
knl-num-body?
|
||||
(fn
|
||||
(s start n)
|
||||
(cond
|
||||
((>= start n) false)
|
||||
((= (substring s start (+ start 1)) ".")
|
||||
(knl-num-need-digits? s (+ start 1) n false))
|
||||
((lex-digit? (substring s start (+ start 1)))
|
||||
(knl-num-int-tail? s (+ start 1) n))
|
||||
(:else false))))
|
||||
|
||||
(define
|
||||
knl-num-int-tail?
|
||||
(fn
|
||||
(s i n)
|
||||
(cond
|
||||
((>= i n) true)
|
||||
((lex-digit? (substring s i (+ i 1)))
|
||||
(knl-num-int-tail? s (+ i 1) n))
|
||||
((= (substring s i (+ i 1)) ".")
|
||||
(knl-num-need-digits? s (+ i 1) n true))
|
||||
((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E"))
|
||||
(knl-num-exp-sign? s (+ i 1) n))
|
||||
(:else false))))
|
||||
|
||||
(define
|
||||
knl-num-need-digits?
|
||||
(fn
|
||||
(s i n had-int)
|
||||
(cond
|
||||
((>= i n) had-int)
|
||||
((lex-digit? (substring s i (+ i 1)))
|
||||
(knl-num-frac-tail? s (+ i 1) n))
|
||||
(:else false))))
|
||||
|
||||
(define
|
||||
knl-num-frac-tail?
|
||||
(fn
|
||||
(s i n)
|
||||
(cond
|
||||
((>= i n) true)
|
||||
((lex-digit? (substring s i (+ i 1)))
|
||||
(knl-num-frac-tail? s (+ i 1) n))
|
||||
((or (= (substring s i (+ i 1)) "e") (= (substring s i (+ i 1)) "E"))
|
||||
(knl-num-exp-sign? s (+ i 1) n))
|
||||
(:else false))))
|
||||
|
||||
(define
|
||||
knl-num-exp-sign?
|
||||
(fn
|
||||
(s i n)
|
||||
(cond
|
||||
((>= i n) false)
|
||||
((or (= (substring s i (+ i 1)) "+") (= (substring s i (+ i 1)) "-"))
|
||||
(knl-num-exp-digits? s (+ i 1) n false))
|
||||
(:else (knl-num-exp-digits? s i n false)))))
|
||||
|
||||
(define
|
||||
knl-num-exp-digits?
|
||||
(fn
|
||||
(s i n had)
|
||||
(cond
|
||||
((>= i n) had)
|
||||
((lex-digit? (substring s i (+ i 1)))
|
||||
(knl-num-exp-digits? s (+ i 1) n true))
|
||||
(:else false))))
|
||||
|
||||
;; Reader: a closure over (src, pos). Exposes :read-form and :read-all.
|
||||
(define
|
||||
knl-make-reader
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((pos 0) (n (string-length src)))
|
||||
(define
|
||||
at
|
||||
(fn () (if (< pos n) (substring src pos (+ pos 1)) nil)))
|
||||
(define adv (fn () (set! pos (+ pos 1))))
|
||||
(define
|
||||
skip-line
|
||||
(fn () (when (and (at) (not (= (at) "\n"))) (adv) (skip-line))))
|
||||
(define
|
||||
skip-ws
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((nil? (at)) nil)
|
||||
((lex-whitespace? (at)) (do (adv) (skip-ws)))
|
||||
((= (at) ";") (do (adv) (skip-line) (skip-ws)))
|
||||
(:else nil))))
|
||||
(define
|
||||
read-string-body
|
||||
(fn
|
||||
(acc)
|
||||
(cond
|
||||
((nil? (at)) (error "kernel-parse: unterminated string"))
|
||||
((= (at) "\"") (do (adv) acc))
|
||||
((= (at) "\\")
|
||||
(do
|
||||
(adv)
|
||||
(let
|
||||
((c (at)))
|
||||
(when (nil? c) (error "kernel-parse: trailing backslash"))
|
||||
(adv)
|
||||
(read-string-body
|
||||
(str
|
||||
acc
|
||||
(cond
|
||||
((= c "n") "\n")
|
||||
((= c "t") "\t")
|
||||
((= c "r") "\r")
|
||||
((= c "\"") "\"")
|
||||
((= c "\\") "\\")
|
||||
(:else c)))))))
|
||||
(:else
|
||||
(let ((c (at))) (adv) (read-string-body (str acc c)))))))
|
||||
(define
|
||||
read-atom-body
|
||||
(fn
|
||||
(acc)
|
||||
(cond
|
||||
((knl-delim? (at)) acc)
|
||||
(:else (let ((c (at))) (adv) (read-atom-body (str acc c)))))))
|
||||
(define
|
||||
classify-atom
|
||||
(fn
|
||||
(s)
|
||||
(cond
|
||||
((= s "#t") true)
|
||||
((= s "#f") false)
|
||||
((knl-numeric? s) (string->number s))
|
||||
(:else s))))
|
||||
(define
|
||||
read-form
|
||||
(fn
|
||||
()
|
||||
(skip-ws)
|
||||
(cond
|
||||
((nil? (at)) :knl-eof)
|
||||
((= (at) ")") (error "kernel-parse: unexpected ')'"))
|
||||
((= (at) "(") (do (adv) (read-list (list))))
|
||||
((= (at) "\"")
|
||||
(do (adv) (kernel-string-make (read-string-body ""))))
|
||||
(:else (classify-atom (read-atom-body ""))))))
|
||||
(define
|
||||
read-list
|
||||
(fn
|
||||
(acc)
|
||||
(skip-ws)
|
||||
(cond
|
||||
((nil? (at)) (error "kernel-parse: unterminated list"))
|
||||
((= (at) ")") (do (adv) acc))
|
||||
(:else (read-list (append acc (list (read-form))))))))
|
||||
(define
|
||||
read-all
|
||||
(fn
|
||||
(acc)
|
||||
(skip-ws)
|
||||
(if (nil? (at)) acc (read-all (append acc (list (read-form)))))))
|
||||
{:read-form read-form :read-all read-all})))
|
||||
|
||||
(define
|
||||
kernel-parse-all
|
||||
(fn (src) ((get (knl-make-reader src) :read-all) (list))))
|
||||
|
||||
(define
|
||||
kernel-parse
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((r (knl-make-reader src)))
|
||||
(let
|
||||
((form ((get r :read-form))))
|
||||
(cond
|
||||
((= form :knl-eof) (error "kernel-parse: empty input"))
|
||||
(:else
|
||||
(let
|
||||
((next ((get r :read-form))))
|
||||
(if
|
||||
(= next :knl-eof)
|
||||
form
|
||||
(error "kernel-parse: trailing input after first form")))))))))
|
||||
Reference in New Issue
Block a user