Files
rose-ash/lib/common-lisp/reader.sx
giles 13d0ebcce8
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
common-lisp: Phase 1 tokenizer + 79 tests
lib/common-lisp/reader.sx — CL tokenizer: symbols with package
qualification (pkg:sym/pkg::sym), integers, floats, ratios, hex/
binary/octal (#xFF/#b1010/#o17), strings with escapes, #\ char
literals (named + bare), reader macros (#' #( #: ,@), line and
nested block comments.

lib/common-lisp/tests/read.sx — 79 tests, all green.
lib/common-lisp/test.sh — test runner (sx_server pipe protocol).

Key SX gotcha: use str not concat for string building.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 18:06:30 +00:00

382 lines
12 KiB
Plaintext

;; Common Lisp tokenizer
;;
;; Tokens: {:type T :value V :pos P}
;;
;; Types:
;; "symbol" — FOO, PKG:SYM, PKG::SYM, T, NIL (upcase)
;; "keyword" — :foo (value is upcase name without colon)
;; "integer" — 42, -5, #xFF, #b1010, #o17 (string)
;; "float" — 3.14, 1.0e10 (string)
;; "ratio" — 1/3 (string "N/D")
;; "string" — unescaped content
;; "char" — single-character string
;; "lparen" "rparen" "quote" "backquote" "comma" "comma-at"
;; "hash-quote" — #'
;; "hash-paren" — #(
;; "uninterned" — #:foo (upcase name)
;; "dot" — standalone . (dotted pair separator)
;; "eof"
(define cl-make-tok (fn (type value pos) {:type type :value value :pos pos}))
;; ── char ordinal table ────────────────────────────────────────────
(define
cl-ord-table
(let
((t (dict)) (i 0))
(define
cl-fill
(fn
()
(when
(< i 128)
(do
(dict-set! t (char-from-code i) i)
(set! i (+ i 1))
(cl-fill)))))
(cl-fill)
t))
(define cl-ord (fn (c) (or (get cl-ord-table c) 0)))
;; ── character predicates ──────────────────────────────────────────
(define cl-digit? (fn (c) (and (>= (cl-ord c) 48) (<= (cl-ord c) 57))))
(define
cl-hex?
(fn
(c)
(or
(cl-digit? c)
(and (>= (cl-ord c) 65) (<= (cl-ord c) 70))
(and (>= (cl-ord c) 97) (<= (cl-ord c) 102)))))
(define cl-octal? (fn (c) (and (>= (cl-ord c) 48) (<= (cl-ord c) 55))))
(define cl-binary? (fn (c) (or (= c "0") (= c "1"))))
(define cl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
cl-alpha?
(fn
(c)
(or
(and (>= (cl-ord c) 65) (<= (cl-ord c) 90))
(and (>= (cl-ord c) 97) (<= (cl-ord c) 122)))))
;; Characters that end a token (whitespace + terminating macro chars)
(define
cl-terminating?
(fn
(c)
(or
(cl-ws? c)
(= c "(")
(= c ")")
(= c "\"")
(= c ";")
(= c "`")
(= c ","))))
;; Symbol constituent: not terminating, not reader-special
(define
cl-sym-char?
(fn
(c)
(not
(or
(cl-terminating? c)
(= c "#")
(= c "|")
(= c "\\")
(= c "'")))))
;; ── named character table ─────────────────────────────────────────
(define
cl-named-chars
{:space " "
:newline "\n"
:tab "\t"
:return "\r"
:backspace (char-from-code 8)
:rubout (char-from-code 127)
:delete (char-from-code 127)
:escape (char-from-code 27)
:altmode (char-from-code 27)
:null (char-from-code 0)
:nul (char-from-code 0)
:page (char-from-code 12)
:formfeed (char-from-code 12)})
;; ── main tokenizer ────────────────────────────────────────────────
(define
cl-tokenize
(fn
(src)
(let
((pos 0) (n (string-length src)) (toks (list)))
(define at (fn () (if (< pos n) (substring src pos (+ pos 1)) nil)))
(define peek1 (fn () (if (< (+ pos 1) n) (substring src (+ pos 1) (+ pos 2)) nil)))
(define adv (fn () (set! pos (+ pos 1))))
;; Advance while predicate holds; return substring from start to end
(define
read-while
(fn
(pred)
(let
((start pos))
(define
rw-loop
(fn
()
(when
(and (at) (pred (at)))
(do (adv) (rw-loop)))))
(rw-loop)
(substring src start pos))))
(define
skip-line
(fn
()
(when
(and (at) (not (= (at) "\n")))
(do (adv) (skip-line)))))
(define
skip-block
(fn
(depth)
(when
(at)
(cond
((and (= (at) "#") (= (peek1) "|"))
(do (adv) (adv) (skip-block (+ depth 1))))
((and (= (at) "|") (= (peek1) "#"))
(do
(adv)
(adv)
(when (> depth 1) (skip-block (- depth 1)))))
(:else (do (adv) (skip-block depth)))))))
;; Read string literal — called with pos just past opening "
(define
read-str
(fn
(acc)
(if
(not (at))
acc
(cond
((= (at) "\"") (do (adv) acc))
((= (at) "\\")
(do
(adv)
(let
((e (at)))
(adv)
(read-str
(str
acc
(cond
((= e "n") "\n")
((= e "t") "\t")
((= e "r") "\r")
((= e "\"") "\"")
((= e "\\") "\\")
(:else e)))))))
(:else
(let
((c (at)))
(adv)
(read-str (str acc c))))))))
;; Read #\ char literal — called with pos just past the backslash
(define
read-char-lit
(fn
()
(let
((first (at)))
(adv)
(let
((rest (if (and (at) (cl-alpha? (at))) (read-while cl-alpha?) "")))
(if
(= rest "")
first
(let
((name (downcase (str first rest))))
(or (get cl-named-chars name) first)))))))
;; Number scanner — called with pos just past first digit(s).
;; acc holds what was already consumed (first digit or sign+digit).
(define
scan-num
(fn
(p acc)
(let
((more (read-while cl-digit?)))
(set! acc (str acc more))
(cond
;; ratio N/D
((and (at) (= (at) "/") (peek1) (cl-digit? (peek1)))
(do
(adv)
(let
((denom (read-while cl-digit?)))
{:type "ratio" :value (str acc "/" denom) :pos p})))
;; float: decimal point N.M[eE]
((and (at) (= (at) ".") (peek1) (cl-digit? (peek1)))
(do
(adv)
(let
((frac (read-while cl-digit?)))
(set! acc (str acc "." frac))
(when
(and (at) (or (= (at) "e") (= (at) "E")))
(do
(set! acc (str acc (at)))
(adv)
(when
(and (at) (or (= (at) "+") (= (at) "-")))
(do (set! acc (str acc (at))) (adv)))
(set! acc (str acc (read-while cl-digit?)))))
{:type "float" :value acc :pos p})))
;; float: exponent only NeE
((and (at) (or (= (at) "e") (= (at) "E")))
(do
(set! acc (str acc (at)))
(adv)
(when
(and (at) (or (= (at) "+") (= (at) "-")))
(do (set! acc (str acc (at))) (adv)))
(set! acc (str acc (read-while cl-digit?)))
{:type "float" :value acc :pos p}))
(:else {:type "integer" :value acc :pos p})))))
(define
read-radix
(fn
(letter p)
(let
((pred
(cond
((or (= letter "x") (= letter "X")) cl-hex?)
((or (= letter "b") (= letter "B")) cl-binary?)
((or (= letter "o") (= letter "O")) cl-octal?)
(:else cl-digit?))))
{:type "integer"
:value (str "#" letter (read-while pred))
:pos p})))
(define emit (fn (tok) (append! toks tok)))
(define
scan
(fn
()
(when
(< pos n)
(let
((c (at)) (p pos))
(cond
((cl-ws? c) (do (adv) (scan)))
((= c ";") (do (adv) (skip-line) (scan)))
((= c "(") (do (adv) (emit (cl-make-tok "lparen" "(" p)) (scan)))
((= c ")") (do (adv) (emit (cl-make-tok "rparen" ")" p)) (scan)))
((= c "'") (do (adv) (emit (cl-make-tok "quote" "'" p)) (scan)))
((= c "`") (do (adv) (emit (cl-make-tok "backquote" "`" p)) (scan)))
((= c ",")
(do
(adv)
(if
(= (at) "@")
(do (adv) (emit (cl-make-tok "comma-at" ",@" p)))
(emit (cl-make-tok "comma" "," p)))
(scan)))
((= c "\"")
(do
(adv)
(emit (cl-make-tok "string" (read-str "") p))
(scan)))
;; :keyword
((= c ":")
(do
(adv)
(emit (cl-make-tok "keyword" (upcase (read-while cl-sym-char?)) p))
(scan)))
;; dispatch macro #
((= c "#")
(do
(adv)
(let
((d (at)))
(cond
((= d "'") (do (adv) (emit (cl-make-tok "hash-quote" "#'" p)) (scan)))
((= d "(") (do (adv) (emit (cl-make-tok "hash-paren" "#(" p)) (scan)))
((= d ":")
(do
(adv)
(emit
(cl-make-tok "uninterned" (upcase (read-while cl-sym-char?)) p))
(scan)))
((= d "|") (do (adv) (skip-block 1) (scan)))
((= d "\\")
(do (adv) (emit (cl-make-tok "char" (read-char-lit) p)) (scan)))
((or (= d "x") (= d "X"))
(do (adv) (emit (read-radix d p)) (scan)))
((or (= d "b") (= d "B"))
(do (adv) (emit (read-radix d p)) (scan)))
((or (= d "o") (= d "O"))
(do (adv) (emit (read-radix d p)) (scan)))
(:else (scan))))))
;; standalone dot, float .5, or symbol starting with dots
((= c ".")
(do
(adv)
(cond
((or (not (at)) (cl-terminating? (at)))
(do (emit (cl-make-tok "dot" "." p)) (scan)))
((cl-digit? (at))
(do
(emit
(cl-make-tok "float" (str "0." (read-while cl-digit?)) p))
(scan)))
(:else
(do
(emit
(cl-make-tok "symbol" (upcase (str "." (read-while cl-sym-char?))) p))
(scan))))))
;; sign followed by digit → number
((and (or (= c "+") (= c "-")) (peek1) (cl-digit? (peek1)))
(do
(adv)
(let
((first-d (at)))
(adv)
(emit (scan-num p (str c first-d))))
(scan)))
;; decimal digit → number
((cl-digit? c)
(do
(adv)
(emit (scan-num p c))
(scan)))
;; symbol constituent (includes bare +, -, etc.)
((cl-sym-char? c)
(do
(emit (cl-make-tok "symbol" (upcase (read-while cl-sym-char?)) p))
(scan)))
(:else (do (adv) (scan))))))))
(scan)
(append! toks (cl-make-tok "eof" nil n))
toks)))