;; Common Lisp reader — converts token stream to CL AST forms. ;; ;; Depends on: lib/common-lisp/reader.sx (cl-tokenize) ;; ;; AST representation: ;; integer/float → SX number (or {:cl-type "float"/:ratio ...}) ;; string → SX string ;; symbol FOO → SX string "FOO" (upcase) ;; symbol NIL → nil ;; symbol T → true ;; :keyword → {:cl-type "keyword" :name "FOO"} ;; #\char → {:cl-type "char" :value "a"} ;; #:uninterned → {:cl-type "uninterned" :name "FOO"} ;; ratio 1/3 → {:cl-type "ratio" :value "1/3"} ;; float 3.14 → {:cl-type "float" :value "3.14"} ;; proper list (a b c) → SX list (a b c) ;; dotted pair (a . b) → {:cl-type "cons" :car a :cdr b} ;; vector #(a b) → {:cl-type "vector" :elements (list a b)} ;; 'x → ("QUOTE" x) ;; `x → ("QUASIQUOTE" x) ;; ,x → ("UNQUOTE" x) ;; ,@x → ("UNQUOTE-SPLICING" x) ;; #'x → ("FUNCTION" x) ;; ;; Public API: ;; (cl-read src) — parse first form from string, return form ;; (cl-read-all src) — parse all top-level forms, return list ;; ── number conversion ───────────────────────────────────────────── (define cl-hex-val (fn (c) (let ((o (cl-ord c))) (cond ((and (>= o 48) (<= o 57)) (- o 48)) ((and (>= o 65) (<= o 70)) (+ 10 (- o 65))) ((and (>= o 97) (<= o 102)) (+ 10 (- o 97))) (:else 0))))) (define cl-parse-radix-str (fn (s radix start) (let ((n (string-length s)) (i start) (acc 0)) (define loop (fn () (when (< i n) (do (set! acc (+ (* acc radix) (cl-hex-val (substring s i (+ i 1))))) (set! i (+ i 1)) (loop))))) (loop) acc))) (define cl-convert-integer (fn (s) (let ((n (string-length s)) (neg false)) (cond ((and (> n 2) (= (substring s 0 1) "#")) (let ((letter (downcase (substring s 1 2)))) (cond ((= letter "x") (cl-parse-radix-str s 16 2)) ((= letter "b") (cl-parse-radix-str s 2 2)) ((= letter "o") (cl-parse-radix-str s 8 2)) (:else (parse-int s 0))))) (:else (parse-int s 0)))))) ;; ── reader ──────────────────────────────────────────────────────── ;; Read one form from token list. ;; Returns {:form F :rest remaining-toks} or {:form nil :rest toks :eof true} (define cl-read-form (fn (toks) (if (not toks) {:form nil :rest toks :eof true} (let ((tok (nth toks 0)) (nxt (rest toks))) (let ((type (get tok "type")) (val (get tok "value"))) (cond ((= type "eof") {:form nil :rest toks :eof true}) ((= type "integer") {:form (cl-convert-integer val) :rest nxt}) ((= type "float") {:form {:cl-type "float" :value val} :rest nxt}) ((= type "ratio") {:form {:cl-type "ratio" :value val} :rest nxt}) ((= type "string") {:form val :rest nxt}) ((= type "char") {:form {:cl-type "char" :value val} :rest nxt}) ((= type "keyword") {:form {:cl-type "keyword" :name val} :rest nxt}) ((= type "uninterned") {:form {:cl-type "uninterned" :name val} :rest nxt}) ((= type "symbol") (cond ((= val "NIL") {:form nil :rest nxt}) ((= val "T") {:form true :rest nxt}) (:else {:form val :rest nxt}))) ;; list forms ((= type "lparen") (cl-read-list nxt)) ((= type "hash-paren") (cl-read-vector nxt)) ;; reader macros that wrap the next form ((= type "quote") (cl-read-wrap "QUOTE" nxt)) ((= type "backquote") (cl-read-wrap "QUASIQUOTE" nxt)) ((= type "comma") (cl-read-wrap "UNQUOTE" nxt)) ((= type "comma-at") (cl-read-wrap "UNQUOTE-SPLICING" nxt)) ((= type "hash-quote") (cl-read-wrap "FUNCTION" nxt)) ;; skip unrecognised tokens (:else (cl-read-form nxt)))))))) ;; Wrap next form in a list: (name form) (define cl-read-wrap (fn (name toks) (let ((inner (cl-read-form toks))) {:form (list name (get inner "form")) :rest (get inner "rest")}))) ;; Read list forms until ')'; handles dotted pair (a . b) ;; Called after consuming '(' (define cl-read-list (fn (toks) (let ((result (cl-read-list-items toks (list)))) {:form (get result "items") :rest (get result "rest")}))) (define cl-read-list-items (fn (toks acc) (if (not toks) {:items acc :rest toks} (let ((tok (nth toks 0))) (let ((type (get tok "type"))) (cond ((= type "eof") {:items acc :rest toks}) ((= type "rparen") {:items acc :rest (rest toks)}) ;; dotted pair: read one more form then expect ')' ((= type "dot") (let ((cdr-result (cl-read-form (rest toks)))) (let ((cdr-form (get cdr-result "form")) (after-cdr (get cdr-result "rest"))) ;; skip the closing ')' (let ((close (if after-cdr (nth after-cdr 0) nil))) (let ((remaining (if (and close (= (get close "type") "rparen")) (rest after-cdr) after-cdr))) ;; build dotted structure (let ((dotted (cl-build-dotted acc cdr-form))) {:items dotted :rest remaining})))))) (:else (let ((item (cl-read-form toks))) (cl-read-list-items (get item "rest") (concat acc (list (get item "form")))))))))))) ;; Build dotted form: (a b . c) → ((DOTTED a b) . c) style ;; In CL (a b c . d) means a proper dotted structure. ;; We represent it as {:cl-type "cons" :car a :cdr (list->dotted b c d)} (define cl-build-dotted (fn (head-items tail) (if (= (len head-items) 0) tail (if (= (len head-items) 1) {:cl-type "cons" :car (nth head-items 0) :cdr tail} (let ((last-item (nth head-items (- (len head-items) 1))) (but-last (slice head-items 0 (- (len head-items) 1)))) {:cl-type "cons" :car (cl-build-dotted but-last (list last-item)) :cdr tail}))))) ;; Read vector #(…) elements until ')' (define cl-read-vector (fn (toks) (let ((result (cl-read-vector-items toks (list)))) {:form {:cl-type "vector" :elements (get result "items")} :rest (get result "rest")}))) (define cl-read-vector-items (fn (toks acc) (if (not toks) {:items acc :rest toks} (let ((tok (nth toks 0))) (let ((type (get tok "type"))) (cond ((= type "eof") {:items acc :rest toks}) ((= type "rparen") {:items acc :rest (rest toks)}) (:else (let ((item (cl-read-form toks))) (cl-read-vector-items (get item "rest") (concat acc (list (get item "form")))))))))))) ;; ── public API ──────────────────────────────────────────────────── (define cl-read (fn (src) (let ((toks (cl-tokenize src))) (get (cl-read-form toks) "form")))) (define cl-read-all (fn (src) (let ((toks (cl-tokenize src))) (define loop (fn (toks acc) (if (or (not toks) (= (get (nth toks 0) "type") "eof")) acc (let ((result (cl-read-form toks))) (if (get result "eof") acc (loop (get result "rest") (concat acc (list (get result "form"))))))))) (loop toks (list)))))