From bcf6057ac5386ba6fbb0d834d9f34896257f3dae Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 25 Apr 2026 18:15:07 +0000 Subject: [PATCH] common-lisp: Phase 1 reader + 62 tests (141 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/common-lisp/parser.sx — cl-read/cl-read-all: lists, dotted pairs (a . b) → cons dict, quote/backquote/unquote/splice as wrapper lists, #' → FUNCTION, #(…) → vector dict, #:foo → uninterned dict, NIL→nil, T→true, integer radix conversion (#xFF/#b1010/#o17). Floats/ratios kept as annotated dicts. lib/common-lisp/tests/parse.sx — 62 tests, all green. Co-Authored-By: Claude Sonnet 4.6 --- lib/common-lisp/parser.sx | 259 +++++++++++++++++++++++++++++++++ lib/common-lisp/test.sh | 2 + lib/common-lisp/tests/parse.sx | 123 ++++++++++++++++ plans/common-lisp-on-sx.md | 3 +- 4 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 lib/common-lisp/parser.sx create mode 100644 lib/common-lisp/tests/parse.sx diff --git a/lib/common-lisp/parser.sx b/lib/common-lisp/parser.sx new file mode 100644 index 00000000..b34867fa --- /dev/null +++ b/lib/common-lisp/parser.sx @@ -0,0 +1,259 @@ +;; Common Lisp reader — converts token stream to CL AST forms. +;; +;; Depends on: lib/common-lisp/reader.sx (cl-tokenize) +;; +;; AST representation: +;; integer/float → SX number (or {:cl-type "float"/:ratio ...}) +;; string → SX string +;; symbol FOO → SX string "FOO" (upcase) +;; symbol NIL → nil +;; symbol T → true +;; :keyword → {:cl-type "keyword" :name "FOO"} +;; #\char → {:cl-type "char" :value "a"} +;; #:uninterned → {:cl-type "uninterned" :name "FOO"} +;; ratio 1/3 → {:cl-type "ratio" :value "1/3"} +;; float 3.14 → {:cl-type "float" :value "3.14"} +;; proper list (a b c) → SX list (a b c) +;; dotted pair (a . b) → {:cl-type "cons" :car a :cdr b} +;; vector #(a b) → {:cl-type "vector" :elements (list a b)} +;; 'x → ("QUOTE" x) +;; `x → ("QUASIQUOTE" x) +;; ,x → ("UNQUOTE" x) +;; ,@x → ("UNQUOTE-SPLICING" x) +;; #'x → ("FUNCTION" x) +;; +;; Public API: +;; (cl-read src) — parse first form from string, return form +;; (cl-read-all src) — parse all top-level forms, return list + +;; ── number conversion ───────────────────────────────────────────── + +(define + cl-hex-val + (fn + (c) + (let + ((o (cl-ord c))) + (cond + ((and (>= o 48) (<= o 57)) (- o 48)) + ((and (>= o 65) (<= o 70)) (+ 10 (- o 65))) + ((and (>= o 97) (<= o 102)) (+ 10 (- o 97))) + (:else 0))))) + +(define + cl-parse-radix-str + (fn + (s radix start) + (let + ((n (string-length s)) (i start) (acc 0)) + (define + loop + (fn + () + (when + (< i n) + (do + (set! acc (+ (* acc radix) (cl-hex-val (substring s i (+ i 1))))) + (set! i (+ i 1)) + (loop))))) + (loop) + acc))) + +(define + cl-convert-integer + (fn + (s) + (let + ((n (string-length s)) (neg false)) + (cond + ((and (> n 2) (= (substring s 0 1) "#")) + (let + ((letter (downcase (substring s 1 2)))) + (cond + ((= letter "x") (cl-parse-radix-str s 16 2)) + ((= letter "b") (cl-parse-radix-str s 2 2)) + ((= letter "o") (cl-parse-radix-str s 8 2)) + (:else (parse-int s 0))))) + (:else (parse-int s 0)))))) + +;; ── reader ──────────────────────────────────────────────────────── + +;; Read one form from token list. +;; Returns {:form F :rest remaining-toks} or {:form nil :rest toks :eof true} +(define + cl-read-form + (fn + (toks) + (if + (not toks) + {:form nil :rest toks :eof true} + (let + ((tok (nth toks 0)) (nxt (rest toks))) + (let + ((type (get tok "type")) (val (get tok "value"))) + (cond + ((= type "eof") {:form nil :rest toks :eof true}) + ((= type "integer") {:form (cl-convert-integer val) :rest nxt}) + ((= type "float") {:form {:cl-type "float" :value val} :rest nxt}) + ((= type "ratio") {:form {:cl-type "ratio" :value val} :rest nxt}) + ((= type "string") {:form val :rest nxt}) + ((= type "char") {:form {:cl-type "char" :value val} :rest nxt}) + ((= type "keyword") {:form {:cl-type "keyword" :name val} :rest nxt}) + ((= type "uninterned") {:form {:cl-type "uninterned" :name val} :rest nxt}) + ((= type "symbol") + (cond + ((= val "NIL") {:form nil :rest nxt}) + ((= val "T") {:form true :rest nxt}) + (:else {:form val :rest nxt}))) + ;; list forms + ((= type "lparen") (cl-read-list nxt)) + ((= type "hash-paren") (cl-read-vector nxt)) + ;; reader macros that wrap the next form + ((= type "quote") (cl-read-wrap "QUOTE" nxt)) + ((= type "backquote") (cl-read-wrap "QUASIQUOTE" nxt)) + ((= type "comma") (cl-read-wrap "UNQUOTE" nxt)) + ((= type "comma-at") (cl-read-wrap "UNQUOTE-SPLICING" nxt)) + ((= type "hash-quote") (cl-read-wrap "FUNCTION" nxt)) + ;; skip unrecognised tokens + (:else (cl-read-form nxt)))))))) + +;; Wrap next form in a list: (name form) +(define + cl-read-wrap + (fn + (name toks) + (let + ((inner (cl-read-form toks))) + {:form (list name (get inner "form")) :rest (get inner "rest")}))) + +;; Read list forms until ')'; handles dotted pair (a . b) +;; Called after consuming '(' +(define + cl-read-list + (fn + (toks) + (let + ((result (cl-read-list-items toks (list)))) + {:form (get result "items") :rest (get result "rest")}))) + +(define + cl-read-list-items + (fn + (toks acc) + (if + (not toks) + {:items acc :rest toks} + (let + ((tok (nth toks 0))) + (let + ((type (get tok "type"))) + (cond + ((= type "eof") {:items acc :rest toks}) + ((= type "rparen") {:items acc :rest (rest toks)}) + ;; dotted pair: read one more form then expect ')' + ((= type "dot") + (let + ((cdr-result (cl-read-form (rest toks)))) + (let + ((cdr-form (get cdr-result "form")) + (after-cdr (get cdr-result "rest"))) + ;; skip the closing ')' + (let + ((close (if after-cdr (nth after-cdr 0) nil))) + (let + ((remaining + (if + (and close (= (get close "type") "rparen")) + (rest after-cdr) + after-cdr))) + ;; build dotted structure + (let + ((dotted (cl-build-dotted acc cdr-form))) + {:items dotted :rest remaining})))))) + (:else + (let + ((item (cl-read-form toks))) + (cl-read-list-items + (get item "rest") + (concat acc (list (get item "form")))))))))))) + +;; Build dotted form: (a b . c) → ((DOTTED a b) . c) style +;; In CL (a b c . d) means a proper dotted structure. +;; We represent it as {:cl-type "cons" :car a :cdr (list->dotted b c d)} +(define + cl-build-dotted + (fn + (head-items tail) + (if + (= (len head-items) 0) + tail + (if + (= (len head-items) 1) + {:cl-type "cons" :car (nth head-items 0) :cdr tail} + (let + ((last-item (nth head-items (- (len head-items) 1))) + (but-last (slice head-items 0 (- (len head-items) 1)))) + {:cl-type "cons" + :car (cl-build-dotted but-last (list last-item)) + :cdr tail}))))) + +;; Read vector #(…) elements until ')' +(define + cl-read-vector + (fn + (toks) + (let + ((result (cl-read-vector-items toks (list)))) + {:form {:cl-type "vector" :elements (get result "items")} :rest (get result "rest")}))) + +(define + cl-read-vector-items + (fn + (toks acc) + (if + (not toks) + {:items acc :rest toks} + (let + ((tok (nth toks 0))) + (let + ((type (get tok "type"))) + (cond + ((= type "eof") {:items acc :rest toks}) + ((= type "rparen") {:items acc :rest (rest toks)}) + (:else + (let + ((item (cl-read-form toks))) + (cl-read-vector-items + (get item "rest") + (concat acc (list (get item "form")))))))))))) + +;; ── public API ──────────────────────────────────────────────────── + +(define + cl-read + (fn + (src) + (let + ((toks (cl-tokenize src))) + (get (cl-read-form toks) "form")))) + +(define + cl-read-all + (fn + (src) + (let + ((toks (cl-tokenize src))) + (define + loop + (fn + (toks acc) + (if + (or (not toks) (= (get (nth toks 0) "type") "eof")) + acc + (let + ((result (cl-read-form toks))) + (if + (get result "eof") + acc + (loop (get result "rest") (concat acc (list (get result "form"))))))))) + (loop toks (list))))) diff --git a/lib/common-lisp/test.sh b/lib/common-lisp/test.sh index ace7d3eb..89fc7eb0 100755 --- a/lib/common-lisp/test.sh +++ b/lib/common-lisp/test.sh @@ -43,6 +43,7 @@ for FILE in "${FILES[@]}"; do cat > "$TMPFILE" < "$TMPFILE2" <