common-lisp: Phase 1 reader + 62 tests (141 total)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
lib/common-lisp/parser.sx — cl-read/cl-read-all: lists, dotted pairs (a . b) → cons dict, quote/backquote/unquote/splice as wrapper lists, #' → FUNCTION, #(…) → vector dict, #:foo → uninterned dict, NIL→nil, T→true, integer radix conversion (#xFF/#b1010/#o17). Floats/ratios kept as annotated dicts. lib/common-lisp/tests/parse.sx — 62 tests, all green. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
259
lib/common-lisp/parser.sx
Normal file
259
lib/common-lisp/parser.sx
Normal file
@@ -0,0 +1,259 @@
|
||||
;; Common Lisp reader — converts token stream to CL AST forms.
|
||||
;;
|
||||
;; Depends on: lib/common-lisp/reader.sx (cl-tokenize)
|
||||
;;
|
||||
;; AST representation:
|
||||
;; integer/float → SX number (or {:cl-type "float"/:ratio ...})
|
||||
;; string → SX string
|
||||
;; symbol FOO → SX string "FOO" (upcase)
|
||||
;; symbol NIL → nil
|
||||
;; symbol T → true
|
||||
;; :keyword → {:cl-type "keyword" :name "FOO"}
|
||||
;; #\char → {:cl-type "char" :value "a"}
|
||||
;; #:uninterned → {:cl-type "uninterned" :name "FOO"}
|
||||
;; ratio 1/3 → {:cl-type "ratio" :value "1/3"}
|
||||
;; float 3.14 → {:cl-type "float" :value "3.14"}
|
||||
;; proper list (a b c) → SX list (a b c)
|
||||
;; dotted pair (a . b) → {:cl-type "cons" :car a :cdr b}
|
||||
;; vector #(a b) → {:cl-type "vector" :elements (list a b)}
|
||||
;; 'x → ("QUOTE" x)
|
||||
;; `x → ("QUASIQUOTE" x)
|
||||
;; ,x → ("UNQUOTE" x)
|
||||
;; ,@x → ("UNQUOTE-SPLICING" x)
|
||||
;; #'x → ("FUNCTION" x)
|
||||
;;
|
||||
;; Public API:
|
||||
;; (cl-read src) — parse first form from string, return form
|
||||
;; (cl-read-all src) — parse all top-level forms, return list
|
||||
|
||||
;; ── number conversion ─────────────────────────────────────────────
|
||||
|
||||
(define
|
||||
cl-hex-val
|
||||
(fn
|
||||
(c)
|
||||
(let
|
||||
((o (cl-ord c)))
|
||||
(cond
|
||||
((and (>= o 48) (<= o 57)) (- o 48))
|
||||
((and (>= o 65) (<= o 70)) (+ 10 (- o 65)))
|
||||
((and (>= o 97) (<= o 102)) (+ 10 (- o 97)))
|
||||
(:else 0)))))
|
||||
|
||||
(define
|
||||
cl-parse-radix-str
|
||||
(fn
|
||||
(s radix start)
|
||||
(let
|
||||
((n (string-length s)) (i start) (acc 0))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(< i n)
|
||||
(do
|
||||
(set! acc (+ (* acc radix) (cl-hex-val (substring s i (+ i 1)))))
|
||||
(set! i (+ i 1))
|
||||
(loop)))))
|
||||
(loop)
|
||||
acc)))
|
||||
|
||||
(define
|
||||
cl-convert-integer
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((n (string-length s)) (neg false))
|
||||
(cond
|
||||
((and (> n 2) (= (substring s 0 1) "#"))
|
||||
(let
|
||||
((letter (downcase (substring s 1 2))))
|
||||
(cond
|
||||
((= letter "x") (cl-parse-radix-str s 16 2))
|
||||
((= letter "b") (cl-parse-radix-str s 2 2))
|
||||
((= letter "o") (cl-parse-radix-str s 8 2))
|
||||
(:else (parse-int s 0)))))
|
||||
(:else (parse-int s 0))))))
|
||||
|
||||
;; ── reader ────────────────────────────────────────────────────────
|
||||
|
||||
;; Read one form from token list.
|
||||
;; Returns {:form F :rest remaining-toks} or {:form nil :rest toks :eof true}
|
||||
(define
|
||||
cl-read-form
|
||||
(fn
|
||||
(toks)
|
||||
(if
|
||||
(not toks)
|
||||
{:form nil :rest toks :eof true}
|
||||
(let
|
||||
((tok (nth toks 0)) (nxt (rest toks)))
|
||||
(let
|
||||
((type (get tok "type")) (val (get tok "value")))
|
||||
(cond
|
||||
((= type "eof") {:form nil :rest toks :eof true})
|
||||
((= type "integer") {:form (cl-convert-integer val) :rest nxt})
|
||||
((= type "float") {:form {:cl-type "float" :value val} :rest nxt})
|
||||
((= type "ratio") {:form {:cl-type "ratio" :value val} :rest nxt})
|
||||
((= type "string") {:form val :rest nxt})
|
||||
((= type "char") {:form {:cl-type "char" :value val} :rest nxt})
|
||||
((= type "keyword") {:form {:cl-type "keyword" :name val} :rest nxt})
|
||||
((= type "uninterned") {:form {:cl-type "uninterned" :name val} :rest nxt})
|
||||
((= type "symbol")
|
||||
(cond
|
||||
((= val "NIL") {:form nil :rest nxt})
|
||||
((= val "T") {:form true :rest nxt})
|
||||
(:else {:form val :rest nxt})))
|
||||
;; list forms
|
||||
((= type "lparen") (cl-read-list nxt))
|
||||
((= type "hash-paren") (cl-read-vector nxt))
|
||||
;; reader macros that wrap the next form
|
||||
((= type "quote") (cl-read-wrap "QUOTE" nxt))
|
||||
((= type "backquote") (cl-read-wrap "QUASIQUOTE" nxt))
|
||||
((= type "comma") (cl-read-wrap "UNQUOTE" nxt))
|
||||
((= type "comma-at") (cl-read-wrap "UNQUOTE-SPLICING" nxt))
|
||||
((= type "hash-quote") (cl-read-wrap "FUNCTION" nxt))
|
||||
;; skip unrecognised tokens
|
||||
(:else (cl-read-form nxt))))))))
|
||||
|
||||
;; Wrap next form in a list: (name form)
|
||||
(define
|
||||
cl-read-wrap
|
||||
(fn
|
||||
(name toks)
|
||||
(let
|
||||
((inner (cl-read-form toks)))
|
||||
{:form (list name (get inner "form")) :rest (get inner "rest")})))
|
||||
|
||||
;; Read list forms until ')'; handles dotted pair (a . b)
|
||||
;; Called after consuming '('
|
||||
(define
|
||||
cl-read-list
|
||||
(fn
|
||||
(toks)
|
||||
(let
|
||||
((result (cl-read-list-items toks (list))))
|
||||
{:form (get result "items") :rest (get result "rest")})))
|
||||
|
||||
(define
|
||||
cl-read-list-items
|
||||
(fn
|
||||
(toks acc)
|
||||
(if
|
||||
(not toks)
|
||||
{:items acc :rest toks}
|
||||
(let
|
||||
((tok (nth toks 0)))
|
||||
(let
|
||||
((type (get tok "type")))
|
||||
(cond
|
||||
((= type "eof") {:items acc :rest toks})
|
||||
((= type "rparen") {:items acc :rest (rest toks)})
|
||||
;; dotted pair: read one more form then expect ')'
|
||||
((= type "dot")
|
||||
(let
|
||||
((cdr-result (cl-read-form (rest toks))))
|
||||
(let
|
||||
((cdr-form (get cdr-result "form"))
|
||||
(after-cdr (get cdr-result "rest")))
|
||||
;; skip the closing ')'
|
||||
(let
|
||||
((close (if after-cdr (nth after-cdr 0) nil)))
|
||||
(let
|
||||
((remaining
|
||||
(if
|
||||
(and close (= (get close "type") "rparen"))
|
||||
(rest after-cdr)
|
||||
after-cdr)))
|
||||
;; build dotted structure
|
||||
(let
|
||||
((dotted (cl-build-dotted acc cdr-form)))
|
||||
{:items dotted :rest remaining}))))))
|
||||
(:else
|
||||
(let
|
||||
((item (cl-read-form toks)))
|
||||
(cl-read-list-items
|
||||
(get item "rest")
|
||||
(concat acc (list (get item "form"))))))))))))
|
||||
|
||||
;; Build dotted form: (a b . c) → ((DOTTED a b) . c) style
|
||||
;; In CL (a b c . d) means a proper dotted structure.
|
||||
;; We represent it as {:cl-type "cons" :car a :cdr (list->dotted b c d)}
|
||||
(define
|
||||
cl-build-dotted
|
||||
(fn
|
||||
(head-items tail)
|
||||
(if
|
||||
(= (len head-items) 0)
|
||||
tail
|
||||
(if
|
||||
(= (len head-items) 1)
|
||||
{:cl-type "cons" :car (nth head-items 0) :cdr tail}
|
||||
(let
|
||||
((last-item (nth head-items (- (len head-items) 1)))
|
||||
(but-last (slice head-items 0 (- (len head-items) 1))))
|
||||
{:cl-type "cons"
|
||||
:car (cl-build-dotted but-last (list last-item))
|
||||
:cdr tail})))))
|
||||
|
||||
;; Read vector #(…) elements until ')'
|
||||
(define
|
||||
cl-read-vector
|
||||
(fn
|
||||
(toks)
|
||||
(let
|
||||
((result (cl-read-vector-items toks (list))))
|
||||
{:form {:cl-type "vector" :elements (get result "items")} :rest (get result "rest")})))
|
||||
|
||||
(define
|
||||
cl-read-vector-items
|
||||
(fn
|
||||
(toks acc)
|
||||
(if
|
||||
(not toks)
|
||||
{:items acc :rest toks}
|
||||
(let
|
||||
((tok (nth toks 0)))
|
||||
(let
|
||||
((type (get tok "type")))
|
||||
(cond
|
||||
((= type "eof") {:items acc :rest toks})
|
||||
((= type "rparen") {:items acc :rest (rest toks)})
|
||||
(:else
|
||||
(let
|
||||
((item (cl-read-form toks)))
|
||||
(cl-read-vector-items
|
||||
(get item "rest")
|
||||
(concat acc (list (get item "form"))))))))))))
|
||||
|
||||
;; ── public API ────────────────────────────────────────────────────
|
||||
|
||||
(define
|
||||
cl-read
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((toks (cl-tokenize src)))
|
||||
(get (cl-read-form toks) "form"))))
|
||||
|
||||
(define
|
||||
cl-read-all
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((toks (cl-tokenize src)))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
(toks acc)
|
||||
(if
|
||||
(or (not toks) (= (get (nth toks 0) "type") "eof"))
|
||||
acc
|
||||
(let
|
||||
((result (cl-read-form toks)))
|
||||
(if
|
||||
(get result "eof")
|
||||
acc
|
||||
(loop (get result "rest") (concat acc (list (get result "form")))))))))
|
||||
(loop toks (list)))))
|
||||
@@ -43,6 +43,7 @@ for FILE in "${FILES[@]}"; do
|
||||
cat > "$TMPFILE" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/common-lisp/reader.sx")
|
||||
(load "lib/common-lisp/parser.sx")
|
||||
(epoch 2)
|
||||
(load "$FILE")
|
||||
(epoch 3)
|
||||
@@ -75,6 +76,7 @@ EPOCHS
|
||||
cat > "$TMPFILE2" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/common-lisp/reader.sx")
|
||||
(load "lib/common-lisp/parser.sx")
|
||||
(epoch 2)
|
||||
(load "$FILE")
|
||||
(epoch 3)
|
||||
|
||||
123
lib/common-lisp/tests/parse.sx
Normal file
123
lib/common-lisp/tests/parse.sx
Normal file
@@ -0,0 +1,123 @@
|
||||
;; Common Lisp reader/parser tests
|
||||
|
||||
(define cl-test-pass 0)
|
||||
(define cl-test-fail 0)
|
||||
(define cl-test-fails (list))
|
||||
|
||||
(define
|
||||
cl-test
|
||||
(fn
|
||||
(name actual expected)
|
||||
(if
|
||||
(= actual expected)
|
||||
(set! cl-test-pass (+ cl-test-pass 1))
|
||||
(do
|
||||
(set! cl-test-fail (+ cl-test-fail 1))
|
||||
(append! cl-test-fails {:name name :expected expected :actual actual})))))
|
||||
|
||||
;; ── atoms ─────────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "integer: 42" (cl-read "42") 42)
|
||||
(cl-test "integer: 0" (cl-read "0") 0)
|
||||
(cl-test "integer: negative" (cl-read "-5") -5)
|
||||
(cl-test "integer: positive sign" (cl-read "+3") 3)
|
||||
(cl-test "integer: hex #xFF" (cl-read "#xFF") 255)
|
||||
(cl-test "integer: hex #xAB" (cl-read "#xAB") 171)
|
||||
(cl-test "integer: binary #b1010" (cl-read "#b1010") 10)
|
||||
(cl-test "integer: octal #o17" (cl-read "#o17") 15)
|
||||
|
||||
(cl-test "float: type" (get (cl-read "3.14") "cl-type") "float")
|
||||
(cl-test "float: value" (get (cl-read "3.14") "value") "3.14")
|
||||
(cl-test "float: neg" (get (cl-read "-2.5") "value") "-2.5")
|
||||
(cl-test "float: exp" (get (cl-read "1.0e10") "value") "1.0e10")
|
||||
|
||||
(cl-test "ratio: type" (get (cl-read "1/3") "cl-type") "ratio")
|
||||
(cl-test "ratio: value" (get (cl-read "1/3") "value") "1/3")
|
||||
(cl-test "ratio: 22/7" (get (cl-read "22/7") "value") "22/7")
|
||||
|
||||
(cl-test "string: basic" (cl-read "\"hello\"") "hello")
|
||||
(cl-test "string: empty" (cl-read "\"\"") "")
|
||||
(cl-test "string: with escape" (cl-read "\"a\\nb\"") "a\nb")
|
||||
|
||||
(cl-test "symbol: foo" (cl-read "foo") "FOO")
|
||||
(cl-test "symbol: BAR" (cl-read "BAR") "BAR")
|
||||
(cl-test "symbol: pkg:sym" (cl-read "cl:car") "CL:CAR")
|
||||
(cl-test "symbol: pkg::sym" (cl-read "pkg::foo") "PKG::FOO")
|
||||
|
||||
(cl-test "nil: symbol" (cl-read "nil") nil)
|
||||
(cl-test "nil: uppercase" (cl-read "NIL") nil)
|
||||
(cl-test "t: symbol" (cl-read "t") true)
|
||||
(cl-test "t: uppercase" (cl-read "T") true)
|
||||
|
||||
(cl-test "keyword: type" (get (cl-read ":foo") "cl-type") "keyword")
|
||||
(cl-test "keyword: name" (get (cl-read ":foo") "name") "FOO")
|
||||
(cl-test "keyword: :test" (get (cl-read ":test") "name") "TEST")
|
||||
|
||||
(cl-test "char: type" (get (cl-read "#\\a") "cl-type") "char")
|
||||
(cl-test "char: value" (get (cl-read "#\\a") "value") "a")
|
||||
(cl-test "char: Space" (get (cl-read "#\\Space") "value") " ")
|
||||
(cl-test "char: Newline" (get (cl-read "#\\Newline") "value") "\n")
|
||||
|
||||
(cl-test "uninterned: type" (get (cl-read "#:foo") "cl-type") "uninterned")
|
||||
(cl-test "uninterned: name" (get (cl-read "#:foo") "name") "FOO")
|
||||
|
||||
;; ── lists ─────────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "list: empty" (cl-read "()") (list))
|
||||
(cl-test "list: one element" (cl-read "(foo)") (list "FOO"))
|
||||
(cl-test "list: two elements" (cl-read "(foo bar)") (list "FOO" "BAR"))
|
||||
(cl-test "list: nested" (cl-read "((a b) c)") (list (list "A" "B") "C"))
|
||||
(cl-test "list: with integer" (cl-read "(+ 1 2)") (list "+" 1 2))
|
||||
(cl-test "list: with string" (cl-read "(print \"hi\")") (list "PRINT" "hi"))
|
||||
(cl-test "list: nil element" (cl-read "(a nil b)") (list "A" nil "B"))
|
||||
(cl-test "list: t element" (cl-read "(a t b)") (list "A" true "B"))
|
||||
|
||||
;; ── dotted pairs ──────────────────────────────────────────────<E29480><E29480>──
|
||||
|
||||
(cl-test "dotted: type" (get (cl-read "(a . b)") "cl-type") "cons")
|
||||
(cl-test "dotted: car" (get (cl-read "(a . b)") "car") "A")
|
||||
(cl-test "dotted: cdr" (get (cl-read "(a . b)") "cdr") "B")
|
||||
(cl-test "dotted: number cdr" (get (cl-read "(x . 42)") "cdr") 42)
|
||||
|
||||
;; ── reader macros ────────────────────────────────────────────────<E29480><E29480>
|
||||
|
||||
(cl-test "quote: form" (cl-read "'x") (list "QUOTE" "X"))
|
||||
(cl-test "quote: list" (cl-read "'(a b)") (list "QUOTE" (list "A" "B")))
|
||||
(cl-test "backquote: form" (cl-read "`x") (list "QUASIQUOTE" "X"))
|
||||
(cl-test "unquote: form" (cl-read ",x") (list "UNQUOTE" "X"))
|
||||
(cl-test "comma-at: form" (cl-read ",@x") (list "UNQUOTE-SPLICING" "X"))
|
||||
(cl-test "function: form" (cl-read "#'foo") (list "FUNCTION" "FOO"))
|
||||
|
||||
;; ── vector ────────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "vector: type" (get (cl-read "#(1 2 3)") "cl-type") "vector")
|
||||
(cl-test "vector: elements" (get (cl-read "#(1 2 3)") "elements") (list 1 2 3))
|
||||
(cl-test "vector: empty" (get (cl-read "#()") "elements") (list))
|
||||
(cl-test "vector: mixed" (get (cl-read "#(a 1 \"s\")") "elements") (list "A" 1 "s"))
|
||||
|
||||
;; ── cl-read-all ───────────────────────────────────────────────────
|
||||
|
||||
(cl-test
|
||||
"read-all: empty"
|
||||
(cl-read-all "")
|
||||
(list))
|
||||
|
||||
(cl-test
|
||||
"read-all: two forms"
|
||||
(cl-read-all "42 foo")
|
||||
(list 42 "FOO"))
|
||||
|
||||
(cl-test
|
||||
"read-all: three forms"
|
||||
(cl-read-all "(+ 1 2) (+ 3 4) hello")
|
||||
(list (list "+" 1 2) (list "+" 3 4) "HELLO"))
|
||||
|
||||
(cl-test
|
||||
"read-all: with comments"
|
||||
(cl-read-all "; this is a comment\n42 ; inline\nfoo")
|
||||
(list 42 "FOO"))
|
||||
|
||||
(cl-test
|
||||
"read-all: defun form"
|
||||
(nth (cl-read-all "(defun square (x) (* x x))") 0)
|
||||
(list "DEFUN" "SQUARE" (list "X") (list "*" "X" "X")))
|
||||
@@ -51,7 +51,7 @@ Core mapping:
|
||||
|
||||
### Phase 1 — reader + parser
|
||||
- [x] Tokenizer: symbols (with package qualification `pkg:sym` / `pkg::sym`), numbers (int, float, ratio `1/3`, `#xFF`, `#b1010`, `#o17`), strings `"…"` with `\` escapes, characters `#\Space` `#\Newline` `#\a`, comments `;`, block comments `#| … |#`
|
||||
- [ ] Reader: list, dotted pair, quote `'`, function `#'`, quasiquote `` ` ``, unquote `,`, splice `,@`, vector `#(…)`, uninterned `#:foo`, nil/t literals
|
||||
- [x] Reader: list, dotted pair, quote `'`, function `#'`, quasiquote `` ` ``, unquote `,`, splice `,@`, vector `#(…)`, uninterned `#:foo`, nil/t literals
|
||||
- [ ] Parser: lambda lists with `&optional` `&rest` `&key` `&aux` `&allow-other-keys`, defaults, supplied-p variables
|
||||
- [ ] Unit tests in `lib/common-lisp/tests/read.sx`
|
||||
|
||||
@@ -114,6 +114,7 @@ Core mapping:
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-04-25: Phase 1 reader/parser — 62 new tests, 141 total green. `lib/common-lisp/parser.sx`: cl-read/cl-read-all, lists, dotted pairs, quote/backquote/unquote/splice/#', vectors, #:uninterned, NIL→nil, T→true, reader macro wrappers.
|
||||
- 2026-04-25: Phase 1 tokenizer — 79 tests green. `lib/common-lisp/reader.sx` + `tests/read.sx` + `test.sh`. Handles symbols (pkg:sym, pkg::sym), integers, floats, ratios, hex/binary/octal, strings, #\ chars, reader macros (#' #( #: ,@), line/block comments. Key gotcha: SX `str` for string concat (not `concat`), substring-based read-while.
|
||||
|
||||
## Blockers
|
||||
|
||||
Reference in New Issue
Block a user