Files
rose-ash/lib/common-lisp/parser.sx
giles bcf6057ac5
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
common-lisp: Phase 1 reader + 62 tests (141 total)
lib/common-lisp/parser.sx — cl-read/cl-read-all: lists, dotted
pairs (a . b) → cons dict, quote/backquote/unquote/splice as
wrapper lists, #' → FUNCTION, #(…) → vector dict, #:foo →
uninterned dict, NIL→nil, T→true, integer radix conversion
(#xFF/#b1010/#o17). Floats/ratios kept as annotated dicts.

lib/common-lisp/tests/parse.sx — 62 tests, all green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 18:15:07 +00:00

260 lines
8.4 KiB
Plaintext

;; Common Lisp reader — converts token stream to CL AST forms.
;;
;; Depends on: lib/common-lisp/reader.sx (cl-tokenize)
;;
;; AST representation:
;; integer/float → SX number (or {:cl-type "float"/:ratio ...})
;; string → SX string
;; symbol FOO → SX string "FOO" (upcase)
;; symbol NIL → nil
;; symbol T → true
;; :keyword → {:cl-type "keyword" :name "FOO"}
;; #\char → {:cl-type "char" :value "a"}
;; #:uninterned → {:cl-type "uninterned" :name "FOO"}
;; ratio 1/3 → {:cl-type "ratio" :value "1/3"}
;; float 3.14 → {:cl-type "float" :value "3.14"}
;; proper list (a b c) → SX list (a b c)
;; dotted pair (a . b) → {:cl-type "cons" :car a :cdr b}
;; vector #(a b) → {:cl-type "vector" :elements (list a b)}
;; 'x → ("QUOTE" x)
;; `x → ("QUASIQUOTE" x)
;; ,x → ("UNQUOTE" x)
;; ,@x → ("UNQUOTE-SPLICING" x)
;; #'x → ("FUNCTION" x)
;;
;; Public API:
;; (cl-read src) — parse first form from string, return form
;; (cl-read-all src) — parse all top-level forms, return list
;; ── number conversion ─────────────────────────────────────────────
(define
cl-hex-val
(fn
(c)
(let
((o (cl-ord c)))
(cond
((and (>= o 48) (<= o 57)) (- o 48))
((and (>= o 65) (<= o 70)) (+ 10 (- o 65)))
((and (>= o 97) (<= o 102)) (+ 10 (- o 97)))
(:else 0)))))
(define
cl-parse-radix-str
(fn
(s radix start)
(let
((n (string-length s)) (i start) (acc 0))
(define
loop
(fn
()
(when
(< i n)
(do
(set! acc (+ (* acc radix) (cl-hex-val (substring s i (+ i 1)))))
(set! i (+ i 1))
(loop)))))
(loop)
acc)))
(define
cl-convert-integer
(fn
(s)
(let
((n (string-length s)) (neg false))
(cond
((and (> n 2) (= (substring s 0 1) "#"))
(let
((letter (downcase (substring s 1 2))))
(cond
((= letter "x") (cl-parse-radix-str s 16 2))
((= letter "b") (cl-parse-radix-str s 2 2))
((= letter "o") (cl-parse-radix-str s 8 2))
(:else (parse-int s 0)))))
(:else (parse-int s 0))))))
;; ── reader ────────────────────────────────────────────────────────
;; Read one form from token list.
;; Returns {:form F :rest remaining-toks} or {:form nil :rest toks :eof true}
(define
cl-read-form
(fn
(toks)
(if
(not toks)
{:form nil :rest toks :eof true}
(let
((tok (nth toks 0)) (nxt (rest toks)))
(let
((type (get tok "type")) (val (get tok "value")))
(cond
((= type "eof") {:form nil :rest toks :eof true})
((= type "integer") {:form (cl-convert-integer val) :rest nxt})
((= type "float") {:form {:cl-type "float" :value val} :rest nxt})
((= type "ratio") {:form {:cl-type "ratio" :value val} :rest nxt})
((= type "string") {:form val :rest nxt})
((= type "char") {:form {:cl-type "char" :value val} :rest nxt})
((= type "keyword") {:form {:cl-type "keyword" :name val} :rest nxt})
((= type "uninterned") {:form {:cl-type "uninterned" :name val} :rest nxt})
((= type "symbol")
(cond
((= val "NIL") {:form nil :rest nxt})
((= val "T") {:form true :rest nxt})
(:else {:form val :rest nxt})))
;; list forms
((= type "lparen") (cl-read-list nxt))
((= type "hash-paren") (cl-read-vector nxt))
;; reader macros that wrap the next form
((= type "quote") (cl-read-wrap "QUOTE" nxt))
((= type "backquote") (cl-read-wrap "QUASIQUOTE" nxt))
((= type "comma") (cl-read-wrap "UNQUOTE" nxt))
((= type "comma-at") (cl-read-wrap "UNQUOTE-SPLICING" nxt))
((= type "hash-quote") (cl-read-wrap "FUNCTION" nxt))
;; skip unrecognised tokens
(:else (cl-read-form nxt))))))))
;; Wrap next form in a list: (name form)
(define
cl-read-wrap
(fn
(name toks)
(let
((inner (cl-read-form toks)))
{:form (list name (get inner "form")) :rest (get inner "rest")})))
;; Read list forms until ')'; handles dotted pair (a . b)
;; Called after consuming '('
(define
cl-read-list
(fn
(toks)
(let
((result (cl-read-list-items toks (list))))
{:form (get result "items") :rest (get result "rest")})))
(define
cl-read-list-items
(fn
(toks acc)
(if
(not toks)
{:items acc :rest toks}
(let
((tok (nth toks 0)))
(let
((type (get tok "type")))
(cond
((= type "eof") {:items acc :rest toks})
((= type "rparen") {:items acc :rest (rest toks)})
;; dotted pair: read one more form then expect ')'
((= type "dot")
(let
((cdr-result (cl-read-form (rest toks))))
(let
((cdr-form (get cdr-result "form"))
(after-cdr (get cdr-result "rest")))
;; skip the closing ')'
(let
((close (if after-cdr (nth after-cdr 0) nil)))
(let
((remaining
(if
(and close (= (get close "type") "rparen"))
(rest after-cdr)
after-cdr)))
;; build dotted structure
(let
((dotted (cl-build-dotted acc cdr-form)))
{:items dotted :rest remaining}))))))
(:else
(let
((item (cl-read-form toks)))
(cl-read-list-items
(get item "rest")
(concat acc (list (get item "form"))))))))))))
;; Build dotted form: (a b . c) → ((DOTTED a b) . c) style
;; In CL (a b c . d) means a proper dotted structure.
;; We represent it as {:cl-type "cons" :car a :cdr (list->dotted b c d)}
(define
cl-build-dotted
(fn
(head-items tail)
(if
(= (len head-items) 0)
tail
(if
(= (len head-items) 1)
{:cl-type "cons" :car (nth head-items 0) :cdr tail}
(let
((last-item (nth head-items (- (len head-items) 1)))
(but-last (slice head-items 0 (- (len head-items) 1))))
{:cl-type "cons"
:car (cl-build-dotted but-last (list last-item))
:cdr tail})))))
;; Read vector #(…) elements until ')'
(define
cl-read-vector
(fn
(toks)
(let
((result (cl-read-vector-items toks (list))))
{:form {:cl-type "vector" :elements (get result "items")} :rest (get result "rest")})))
(define
cl-read-vector-items
(fn
(toks acc)
(if
(not toks)
{:items acc :rest toks}
(let
((tok (nth toks 0)))
(let
((type (get tok "type")))
(cond
((= type "eof") {:items acc :rest toks})
((= type "rparen") {:items acc :rest (rest toks)})
(:else
(let
((item (cl-read-form toks)))
(cl-read-vector-items
(get item "rest")
(concat acc (list (get item "form"))))))))))))
;; ── public API ────────────────────────────────────────────────────
(define
cl-read
(fn
(src)
(let
((toks (cl-tokenize src)))
(get (cl-read-form toks) "form"))))
(define
cl-read-all
(fn
(src)
(let
((toks (cl-tokenize src)))
(define
loop
(fn
(toks acc)
(if
(or (not toks) (= (get (nth toks 0) "type") "eof"))
acc
(let
((result (cl-read-form toks)))
(if
(get result "eof")
acc
(loop (get result "rest") (concat acc (list (get result "form")))))))))
(loop toks (list)))))