common-lisp: Phase 1 tokenizer + 79 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
lib/common-lisp/reader.sx — CL tokenizer: symbols with package qualification (pkg:sym/pkg::sym), integers, floats, ratios, hex/ binary/octal (#xFF/#b1010/#o17), strings with escapes, #\ char literals (named + bare), reader macros (#' #( #: ,@), line and nested block comments. lib/common-lisp/tests/read.sx — 79 tests, all green. lib/common-lisp/test.sh — test runner (sx_server pipe protocol). Key SX gotcha: use str not concat for string building. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
381
lib/common-lisp/reader.sx
Normal file
381
lib/common-lisp/reader.sx
Normal file
@@ -0,0 +1,381 @@
|
||||
;; Common Lisp tokenizer
|
||||
;;
|
||||
;; Tokens: {:type T :value V :pos P}
|
||||
;;
|
||||
;; Types:
|
||||
;; "symbol" — FOO, PKG:SYM, PKG::SYM, T, NIL (upcase)
|
||||
;; "keyword" — :foo (value is upcase name without colon)
|
||||
;; "integer" — 42, -5, #xFF, #b1010, #o17 (string)
|
||||
;; "float" — 3.14, 1.0e10 (string)
|
||||
;; "ratio" — 1/3 (string "N/D")
|
||||
;; "string" — unescaped content
|
||||
;; "char" — single-character string
|
||||
;; "lparen" "rparen" "quote" "backquote" "comma" "comma-at"
|
||||
;; "hash-quote" — #'
|
||||
;; "hash-paren" — #(
|
||||
;; "uninterned" — #:foo (upcase name)
|
||||
;; "dot" — standalone . (dotted pair separator)
|
||||
;; "eof"
|
||||
|
||||
(define cl-make-tok (fn (type value pos) {:type type :value value :pos pos}))
|
||||
|
||||
;; ── char ordinal table ────────────────────────────────────────────
|
||||
|
||||
(define
|
||||
cl-ord-table
|
||||
(let
|
||||
((t (dict)) (i 0))
|
||||
(define
|
||||
cl-fill
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(< i 128)
|
||||
(do
|
||||
(dict-set! t (char-from-code i) i)
|
||||
(set! i (+ i 1))
|
||||
(cl-fill)))))
|
||||
(cl-fill)
|
||||
t))
|
||||
|
||||
(define cl-ord (fn (c) (or (get cl-ord-table c) 0)))
|
||||
|
||||
;; ── character predicates ──────────────────────────────────────────
|
||||
|
||||
(define cl-digit? (fn (c) (and (>= (cl-ord c) 48) (<= (cl-ord c) 57))))
|
||||
|
||||
(define
|
||||
cl-hex?
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(cl-digit? c)
|
||||
(and (>= (cl-ord c) 65) (<= (cl-ord c) 70))
|
||||
(and (>= (cl-ord c) 97) (<= (cl-ord c) 102)))))
|
||||
|
||||
(define cl-octal? (fn (c) (and (>= (cl-ord c) 48) (<= (cl-ord c) 55))))
|
||||
|
||||
(define cl-binary? (fn (c) (or (= c "0") (= c "1"))))
|
||||
|
||||
(define cl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
|
||||
|
||||
(define
|
||||
cl-alpha?
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(and (>= (cl-ord c) 65) (<= (cl-ord c) 90))
|
||||
(and (>= (cl-ord c) 97) (<= (cl-ord c) 122)))))
|
||||
|
||||
;; Characters that end a token (whitespace + terminating macro chars)
|
||||
(define
|
||||
cl-terminating?
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(cl-ws? c)
|
||||
(= c "(")
|
||||
(= c ")")
|
||||
(= c "\"")
|
||||
(= c ";")
|
||||
(= c "`")
|
||||
(= c ","))))
|
||||
|
||||
;; Symbol constituent: not terminating, not reader-special
|
||||
(define
|
||||
cl-sym-char?
|
||||
(fn
|
||||
(c)
|
||||
(not
|
||||
(or
|
||||
(cl-terminating? c)
|
||||
(= c "#")
|
||||
(= c "|")
|
||||
(= c "\\")
|
||||
(= c "'")))))
|
||||
|
||||
;; ── named character table ─────────────────────────────────────────
|
||||
|
||||
(define
|
||||
cl-named-chars
|
||||
{:space " "
|
||||
:newline "\n"
|
||||
:tab "\t"
|
||||
:return "\r"
|
||||
:backspace (char-from-code 8)
|
||||
:rubout (char-from-code 127)
|
||||
:delete (char-from-code 127)
|
||||
:escape (char-from-code 27)
|
||||
:altmode (char-from-code 27)
|
||||
:null (char-from-code 0)
|
||||
:nul (char-from-code 0)
|
||||
:page (char-from-code 12)
|
||||
:formfeed (char-from-code 12)})
|
||||
|
||||
;; ── main tokenizer ────────────────────────────────────────────────
|
||||
|
||||
(define
|
||||
cl-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((pos 0) (n (string-length src)) (toks (list)))
|
||||
|
||||
(define at (fn () (if (< pos n) (substring src pos (+ pos 1)) nil)))
|
||||
(define peek1 (fn () (if (< (+ pos 1) n) (substring src (+ pos 1) (+ pos 2)) nil)))
|
||||
(define adv (fn () (set! pos (+ pos 1))))
|
||||
|
||||
;; Advance while predicate holds; return substring from start to end
|
||||
(define
|
||||
read-while
|
||||
(fn
|
||||
(pred)
|
||||
(let
|
||||
((start pos))
|
||||
(define
|
||||
rw-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (at) (pred (at)))
|
||||
(do (adv) (rw-loop)))))
|
||||
(rw-loop)
|
||||
(substring src start pos))))
|
||||
|
||||
(define
|
||||
skip-line
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (at) (not (= (at) "\n")))
|
||||
(do (adv) (skip-line)))))
|
||||
|
||||
(define
|
||||
skip-block
|
||||
(fn
|
||||
(depth)
|
||||
(when
|
||||
(at)
|
||||
(cond
|
||||
((and (= (at) "#") (= (peek1) "|"))
|
||||
(do (adv) (adv) (skip-block (+ depth 1))))
|
||||
((and (= (at) "|") (= (peek1) "#"))
|
||||
(do
|
||||
(adv)
|
||||
(adv)
|
||||
(when (> depth 1) (skip-block (- depth 1)))))
|
||||
(:else (do (adv) (skip-block depth)))))))
|
||||
|
||||
;; Read string literal — called with pos just past opening "
|
||||
(define
|
||||
read-str
|
||||
(fn
|
||||
(acc)
|
||||
(if
|
||||
(not (at))
|
||||
acc
|
||||
(cond
|
||||
((= (at) "\"") (do (adv) acc))
|
||||
((= (at) "\\")
|
||||
(do
|
||||
(adv)
|
||||
(let
|
||||
((e (at)))
|
||||
(adv)
|
||||
(read-str
|
||||
(str
|
||||
acc
|
||||
(cond
|
||||
((= e "n") "\n")
|
||||
((= e "t") "\t")
|
||||
((= e "r") "\r")
|
||||
((= e "\"") "\"")
|
||||
((= e "\\") "\\")
|
||||
(:else e)))))))
|
||||
(:else
|
||||
(let
|
||||
((c (at)))
|
||||
(adv)
|
||||
(read-str (str acc c))))))))
|
||||
|
||||
;; Read #\ char literal — called with pos just past the backslash
|
||||
(define
|
||||
read-char-lit
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((first (at)))
|
||||
(adv)
|
||||
(let
|
||||
((rest (if (and (at) (cl-alpha? (at))) (read-while cl-alpha?) "")))
|
||||
(if
|
||||
(= rest "")
|
||||
first
|
||||
(let
|
||||
((name (downcase (str first rest))))
|
||||
(or (get cl-named-chars name) first)))))))
|
||||
|
||||
;; Number scanner — called with pos just past first digit(s).
|
||||
;; acc holds what was already consumed (first digit or sign+digit).
|
||||
(define
|
||||
scan-num
|
||||
(fn
|
||||
(p acc)
|
||||
(let
|
||||
((more (read-while cl-digit?)))
|
||||
(set! acc (str acc more))
|
||||
(cond
|
||||
;; ratio N/D
|
||||
((and (at) (= (at) "/") (peek1) (cl-digit? (peek1)))
|
||||
(do
|
||||
(adv)
|
||||
(let
|
||||
((denom (read-while cl-digit?)))
|
||||
{:type "ratio" :value (str acc "/" denom) :pos p})))
|
||||
;; float: decimal point N.M[eE]
|
||||
((and (at) (= (at) ".") (peek1) (cl-digit? (peek1)))
|
||||
(do
|
||||
(adv)
|
||||
(let
|
||||
((frac (read-while cl-digit?)))
|
||||
(set! acc (str acc "." frac))
|
||||
(when
|
||||
(and (at) (or (= (at) "e") (= (at) "E")))
|
||||
(do
|
||||
(set! acc (str acc (at)))
|
||||
(adv)
|
||||
(when
|
||||
(and (at) (or (= (at) "+") (= (at) "-")))
|
||||
(do (set! acc (str acc (at))) (adv)))
|
||||
(set! acc (str acc (read-while cl-digit?)))))
|
||||
{:type "float" :value acc :pos p})))
|
||||
;; float: exponent only NeE
|
||||
((and (at) (or (= (at) "e") (= (at) "E")))
|
||||
(do
|
||||
(set! acc (str acc (at)))
|
||||
(adv)
|
||||
(when
|
||||
(and (at) (or (= (at) "+") (= (at) "-")))
|
||||
(do (set! acc (str acc (at))) (adv)))
|
||||
(set! acc (str acc (read-while cl-digit?)))
|
||||
{:type "float" :value acc :pos p}))
|
||||
(:else {:type "integer" :value acc :pos p})))))
|
||||
|
||||
(define
|
||||
read-radix
|
||||
(fn
|
||||
(letter p)
|
||||
(let
|
||||
((pred
|
||||
(cond
|
||||
((or (= letter "x") (= letter "X")) cl-hex?)
|
||||
((or (= letter "b") (= letter "B")) cl-binary?)
|
||||
((or (= letter "o") (= letter "O")) cl-octal?)
|
||||
(:else cl-digit?))))
|
||||
{:type "integer"
|
||||
:value (str "#" letter (read-while pred))
|
||||
:pos p})))
|
||||
|
||||
(define emit (fn (tok) (append! toks tok)))
|
||||
|
||||
(define
|
||||
scan
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(< pos n)
|
||||
(let
|
||||
((c (at)) (p pos))
|
||||
(cond
|
||||
((cl-ws? c) (do (adv) (scan)))
|
||||
((= c ";") (do (adv) (skip-line) (scan)))
|
||||
((= c "(") (do (adv) (emit (cl-make-tok "lparen" "(" p)) (scan)))
|
||||
((= c ")") (do (adv) (emit (cl-make-tok "rparen" ")" p)) (scan)))
|
||||
((= c "'") (do (adv) (emit (cl-make-tok "quote" "'" p)) (scan)))
|
||||
((= c "`") (do (adv) (emit (cl-make-tok "backquote" "`" p)) (scan)))
|
||||
((= c ",")
|
||||
(do
|
||||
(adv)
|
||||
(if
|
||||
(= (at) "@")
|
||||
(do (adv) (emit (cl-make-tok "comma-at" ",@" p)))
|
||||
(emit (cl-make-tok "comma" "," p)))
|
||||
(scan)))
|
||||
((= c "\"")
|
||||
(do
|
||||
(adv)
|
||||
(emit (cl-make-tok "string" (read-str "") p))
|
||||
(scan)))
|
||||
;; :keyword
|
||||
((= c ":")
|
||||
(do
|
||||
(adv)
|
||||
(emit (cl-make-tok "keyword" (upcase (read-while cl-sym-char?)) p))
|
||||
(scan)))
|
||||
;; dispatch macro #
|
||||
((= c "#")
|
||||
(do
|
||||
(adv)
|
||||
(let
|
||||
((d (at)))
|
||||
(cond
|
||||
((= d "'") (do (adv) (emit (cl-make-tok "hash-quote" "#'" p)) (scan)))
|
||||
((= d "(") (do (adv) (emit (cl-make-tok "hash-paren" "#(" p)) (scan)))
|
||||
((= d ":")
|
||||
(do
|
||||
(adv)
|
||||
(emit
|
||||
(cl-make-tok "uninterned" (upcase (read-while cl-sym-char?)) p))
|
||||
(scan)))
|
||||
((= d "|") (do (adv) (skip-block 1) (scan)))
|
||||
((= d "\\")
|
||||
(do (adv) (emit (cl-make-tok "char" (read-char-lit) p)) (scan)))
|
||||
((or (= d "x") (= d "X"))
|
||||
(do (adv) (emit (read-radix d p)) (scan)))
|
||||
((or (= d "b") (= d "B"))
|
||||
(do (adv) (emit (read-radix d p)) (scan)))
|
||||
((or (= d "o") (= d "O"))
|
||||
(do (adv) (emit (read-radix d p)) (scan)))
|
||||
(:else (scan))))))
|
||||
;; standalone dot, float .5, or symbol starting with dots
|
||||
((= c ".")
|
||||
(do
|
||||
(adv)
|
||||
(cond
|
||||
((or (not (at)) (cl-terminating? (at)))
|
||||
(do (emit (cl-make-tok "dot" "." p)) (scan)))
|
||||
((cl-digit? (at))
|
||||
(do
|
||||
(emit
|
||||
(cl-make-tok "float" (str "0." (read-while cl-digit?)) p))
|
||||
(scan)))
|
||||
(:else
|
||||
(do
|
||||
(emit
|
||||
(cl-make-tok "symbol" (upcase (str "." (read-while cl-sym-char?))) p))
|
||||
(scan))))))
|
||||
;; sign followed by digit → number
|
||||
((and (or (= c "+") (= c "-")) (peek1) (cl-digit? (peek1)))
|
||||
(do
|
||||
(adv)
|
||||
(let
|
||||
((first-d (at)))
|
||||
(adv)
|
||||
(emit (scan-num p (str c first-d))))
|
||||
(scan)))
|
||||
;; decimal digit → number
|
||||
((cl-digit? c)
|
||||
(do
|
||||
(adv)
|
||||
(emit (scan-num p c))
|
||||
(scan)))
|
||||
;; symbol constituent (includes bare +, -, etc.)
|
||||
((cl-sym-char? c)
|
||||
(do
|
||||
(emit (cl-make-tok "symbol" (upcase (read-while cl-sym-char?)) p))
|
||||
(scan)))
|
||||
(:else (do (adv) (scan))))))))
|
||||
|
||||
(scan)
|
||||
(append! toks (cl-make-tok "eof" nil n))
|
||||
toks)))
|
||||
98
lib/common-lisp/test.sh
Executable file
98
lib/common-lisp/test.sh
Executable file
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env bash
|
||||
# Common Lisp on SX test runner — pipes directly to sx_server.exe
|
||||
#
|
||||
# Usage:
|
||||
# bash lib/common-lisp/test.sh # all tests
|
||||
# bash lib/common-lisp/test.sh -v # verbose
|
||||
# bash lib/common-lisp/test.sh tests/read.sx # one file
|
||||
|
||||
set -euo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
MAIN_ROOT=$(git worktree list | awk 'NR==1{print $1}')
|
||||
if [ -x "$MAIN_ROOT/$SX_SERVER" ]; then
|
||||
SX_SERVER="$MAIN_ROOT/$SX_SERVER"
|
||||
else
|
||||
echo "ERROR: sx_server.exe not found"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
VERBOSE=""
|
||||
FILES=()
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
-v|--verbose) VERBOSE=1 ;;
|
||||
*) FILES+=("$arg") ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ${#FILES[@]} -eq 0 ]; then
|
||||
mapfile -t FILES < <(find lib/common-lisp/tests -maxdepth 2 -name '*.sx' | sort)
|
||||
fi
|
||||
|
||||
TOTAL_PASS=0
|
||||
TOTAL_FAIL=0
|
||||
FAILED_FILES=()
|
||||
|
||||
for FILE in "${FILES[@]}"; do
|
||||
[ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; }
|
||||
TMPFILE=$(mktemp)
|
||||
cat > "$TMPFILE" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/common-lisp/reader.sx")
|
||||
(epoch 2)
|
||||
(load "$FILE")
|
||||
(epoch 3)
|
||||
(eval "(list cl-test-pass cl-test-fail)")
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>&1 || true)
|
||||
rm -f "$TMPFILE"
|
||||
|
||||
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}' || true)
|
||||
if [ -z "$LINE" ]; then
|
||||
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \
|
||||
| sed -E 's/^\(ok 3 //; s/\)$//' || true)
|
||||
fi
|
||||
if [ -z "$LINE" ]; then
|
||||
echo "✗ $FILE: could not extract summary"
|
||||
echo "$OUTPUT" | tail -20
|
||||
TOTAL_FAIL=$((TOTAL_FAIL + 1))
|
||||
FAILED_FILES+=("$FILE")
|
||||
continue
|
||||
fi
|
||||
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
|
||||
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
|
||||
TOTAL_PASS=$((TOTAL_PASS + P))
|
||||
TOTAL_FAIL=$((TOTAL_FAIL + F))
|
||||
if [ "$F" -gt 0 ]; then
|
||||
FAILED_FILES+=("$FILE")
|
||||
printf '✗ %-40s %d/%d\n' "$FILE" "$P" "$((P+F))"
|
||||
TMPFILE2=$(mktemp)
|
||||
cat > "$TMPFILE2" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/common-lisp/reader.sx")
|
||||
(epoch 2)
|
||||
(load "$FILE")
|
||||
(epoch 3)
|
||||
(eval "(map (fn (f) (get f \"name\")) cl-test-fails)")
|
||||
EPOCHS
|
||||
FAILS=$(timeout 60 "$SX_SERVER" < "$TMPFILE2" 2>&1 | grep -E '^\(ok 3 ' || true)
|
||||
rm -f "$TMPFILE2"
|
||||
echo " $FAILS"
|
||||
elif [ "$VERBOSE" = "1" ]; then
|
||||
printf '✓ %-40s %d passed\n' "$FILE" "$P"
|
||||
fi
|
||||
done
|
||||
|
||||
TOTAL=$((TOTAL_PASS + TOTAL_FAIL))
|
||||
if [ $TOTAL_FAIL -eq 0 ]; then
|
||||
echo "✓ $TOTAL_PASS/$TOTAL common-lisp-on-sx tests passed"
|
||||
else
|
||||
echo "✗ $TOTAL_PASS/$TOTAL passed, $TOTAL_FAIL failed in: ${FAILED_FILES[*]}"
|
||||
fi
|
||||
|
||||
[ $TOTAL_FAIL -eq 0 ]
|
||||
180
lib/common-lisp/tests/read.sx
Normal file
180
lib/common-lisp/tests/read.sx
Normal file
@@ -0,0 +1,180 @@
|
||||
;; Common Lisp tokenizer tests
|
||||
|
||||
(define cl-test-pass 0)
|
||||
(define cl-test-fail 0)
|
||||
(define cl-test-fails (list))
|
||||
|
||||
(define
|
||||
cl-test
|
||||
(fn
|
||||
(name actual expected)
|
||||
(if
|
||||
(= actual expected)
|
||||
(set! cl-test-pass (+ cl-test-pass 1))
|
||||
(do
|
||||
(set! cl-test-fail (+ cl-test-fail 1))
|
||||
(append! cl-test-fails {:name name :expected expected :actual actual})))))
|
||||
|
||||
;; Helpers: extract types and values from token stream (drops eof)
|
||||
(define
|
||||
cl-tok-types
|
||||
(fn
|
||||
(src)
|
||||
(map
|
||||
(fn (t) (get t "type"))
|
||||
(filter (fn (t) (not (= (get t "type") "eof"))) (cl-tokenize src)))))
|
||||
|
||||
(define
|
||||
cl-tok-values
|
||||
(fn
|
||||
(src)
|
||||
(map
|
||||
(fn (t) (get t "value"))
|
||||
(filter (fn (t) (not (= (get t "type") "eof"))) (cl-tokenize src)))))
|
||||
|
||||
(define
|
||||
cl-tok-first
|
||||
(fn (src) (nth (cl-tokenize src) 0)))
|
||||
|
||||
;; ── symbols ───────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "symbol: bare lowercase" (cl-tok-values "foo") (list "FOO"))
|
||||
(cl-test "symbol: uppercase" (cl-tok-values "BAR") (list "BAR"))
|
||||
(cl-test "symbol: mixed case folded" (cl-tok-values "FooBar") (list "FOOBAR"))
|
||||
(cl-test "symbol: with hyphen" (cl-tok-values "foo-bar") (list "FOO-BAR"))
|
||||
(cl-test "symbol: with star" (cl-tok-values "*special*") (list "*SPECIAL*"))
|
||||
(cl-test "symbol: with question" (cl-tok-values "null?") (list "NULL?"))
|
||||
(cl-test "symbol: with exclamation" (cl-tok-values "set!") (list "SET!"))
|
||||
(cl-test "symbol: plus sign alone" (cl-tok-values "+") (list "+"))
|
||||
(cl-test "symbol: minus sign alone" (cl-tok-values "-") (list "-"))
|
||||
(cl-test "symbol: type is symbol" (cl-tok-types "foo") (list "symbol"))
|
||||
|
||||
;; ── package-qualified symbols ─────────────────────────────────────
|
||||
|
||||
(cl-test "symbol: pkg:sym external" (cl-tok-values "cl:car") (list "CL:CAR"))
|
||||
(cl-test "symbol: pkg::sym internal" (cl-tok-values "pkg::foo") (list "PKG::FOO"))
|
||||
(cl-test "symbol: cl:car type" (cl-tok-types "cl:car") (list "symbol"))
|
||||
|
||||
;; ── keywords ──────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "keyword: basic" (cl-tok-values ":foo") (list "FOO"))
|
||||
(cl-test "keyword: type" (cl-tok-types ":foo") (list "keyword"))
|
||||
(cl-test "keyword: upcase" (cl-tok-values ":hello-world") (list "HELLO-WORLD"))
|
||||
(cl-test "keyword: multiple" (cl-tok-types ":a :b :c") (list "keyword" "keyword" "keyword"))
|
||||
|
||||
;; ── integers ──────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "integer: zero" (cl-tok-values "0") (list "0"))
|
||||
(cl-test "integer: positive" (cl-tok-values "42") (list "42"))
|
||||
(cl-test "integer: negative" (cl-tok-values "-5") (list "-5"))
|
||||
(cl-test "integer: positive-sign" (cl-tok-values "+3") (list "+3"))
|
||||
(cl-test "integer: type" (cl-tok-types "42") (list "integer"))
|
||||
(cl-test "integer: multi-digit" (cl-tok-values "12345678") (list "12345678"))
|
||||
|
||||
;; ── hex, binary, octal ───────────────────────────────────────────
|
||||
|
||||
(cl-test "hex: lowercase x" (cl-tok-values "#xFF") (list "#xFF"))
|
||||
(cl-test "hex: uppercase X" (cl-tok-values "#XFF") (list "#XFF"))
|
||||
(cl-test "hex: type" (cl-tok-types "#xFF") (list "integer"))
|
||||
(cl-test "hex: zero" (cl-tok-values "#x0") (list "#x0"))
|
||||
(cl-test "binary: #b" (cl-tok-values "#b1010") (list "#b1010"))
|
||||
(cl-test "binary: type" (cl-tok-types "#b1010") (list "integer"))
|
||||
(cl-test "octal: #o" (cl-tok-values "#o17") (list "#o17"))
|
||||
(cl-test "octal: type" (cl-tok-types "#o17") (list "integer"))
|
||||
|
||||
;; ── floats ────────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "float: basic" (cl-tok-values "3.14") (list "3.14"))
|
||||
(cl-test "float: type" (cl-tok-types "3.14") (list "float"))
|
||||
(cl-test "float: negative" (cl-tok-values "-2.5") (list "-2.5"))
|
||||
(cl-test "float: exponent" (cl-tok-values "1.0e10") (list "1.0e10"))
|
||||
(cl-test "float: neg exponent" (cl-tok-values "1.5e-3") (list "1.5e-3"))
|
||||
(cl-test "float: leading dot" (cl-tok-values ".5") (list "0.5"))
|
||||
(cl-test "float: exp only" (cl-tok-values "1e5") (list "1e5"))
|
||||
|
||||
;; ── ratios ────────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "ratio: 1/3" (cl-tok-values "1/3") (list "1/3"))
|
||||
(cl-test "ratio: type" (cl-tok-types "1/3") (list "ratio"))
|
||||
(cl-test "ratio: 22/7" (cl-tok-values "22/7") (list "22/7"))
|
||||
(cl-test "ratio: negative" (cl-tok-values "-1/2") (list "-1/2"))
|
||||
|
||||
;; ── strings ───────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "string: empty" (cl-tok-values "\"\"") (list ""))
|
||||
(cl-test "string: basic" (cl-tok-values "\"hello\"") (list "hello"))
|
||||
(cl-test "string: type" (cl-tok-types "\"hello\"") (list "string"))
|
||||
(cl-test "string: with space" (cl-tok-values "\"hello world\"") (list "hello world"))
|
||||
(cl-test "string: escaped quote" (cl-tok-values "\"say \\\"hi\\\"\"") (list "say \"hi\""))
|
||||
(cl-test "string: escaped backslash" (cl-tok-values "\"a\\\\b\"") (list "a\\b"))
|
||||
(cl-test "string: newline escape" (cl-tok-values "\"a\\nb\"") (list "a\nb"))
|
||||
(cl-test "string: tab escape" (cl-tok-values "\"a\\tb\"") (list "a\tb"))
|
||||
|
||||
;; ── characters ────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "char: lowercase a" (cl-tok-values "#\\a") (list "a"))
|
||||
(cl-test "char: uppercase A" (cl-tok-values "#\\A") (list "A"))
|
||||
(cl-test "char: digit" (cl-tok-values "#\\1") (list "1"))
|
||||
(cl-test "char: type" (cl-tok-types "#\\a") (list "char"))
|
||||
(cl-test "char: Space" (cl-tok-values "#\\Space") (list " "))
|
||||
(cl-test "char: Newline" (cl-tok-values "#\\Newline") (list "\n"))
|
||||
(cl-test "char: Tab" (cl-tok-values "#\\Tab") (list "\t"))
|
||||
(cl-test "char: Return" (cl-tok-values "#\\Return") (list "\r"))
|
||||
|
||||
;; ── reader macros ─────────────────────────────────────────────────
|
||||
|
||||
(cl-test "quote: type" (cl-tok-types "'x") (list "quote" "symbol"))
|
||||
(cl-test "backquote: type" (cl-tok-types "`x") (list "backquote" "symbol"))
|
||||
(cl-test "comma: type" (cl-tok-types ",x") (list "comma" "symbol"))
|
||||
(cl-test "comma-at: type" (cl-tok-types ",@x") (list "comma-at" "symbol"))
|
||||
(cl-test "hash-quote: type" (cl-tok-types "#'foo") (list "hash-quote" "symbol"))
|
||||
(cl-test "hash-paren: type" (cl-tok-types "#(1 2)") (list "hash-paren" "integer" "integer" "rparen"))
|
||||
|
||||
;; ── uninterned ────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "uninterned: type" (cl-tok-types "#:foo") (list "uninterned"))
|
||||
(cl-test "uninterned: value upcase" (cl-tok-values "#:foo") (list "FOO"))
|
||||
(cl-test "uninterned: compound" (cl-tok-values "#:my-sym") (list "MY-SYM"))
|
||||
|
||||
;; ── parens and structure ──────────────────────────────────────────
|
||||
|
||||
(cl-test "paren: empty list" (cl-tok-types "()") (list "lparen" "rparen"))
|
||||
(cl-test "paren: nested" (cl-tok-types "((a))") (list "lparen" "lparen" "symbol" "rparen" "rparen"))
|
||||
(cl-test "dot: standalone" (cl-tok-types "(a . b)") (list "lparen" "symbol" "dot" "symbol" "rparen"))
|
||||
|
||||
;; ── comments ──────────────────────────────────────────────────────
|
||||
|
||||
(cl-test "comment: line" (cl-tok-types "; comment\nfoo") (list "symbol"))
|
||||
(cl-test "comment: inline" (cl-tok-values "foo ; bar\nbaz") (list "FOO" "BAZ"))
|
||||
(cl-test "block-comment: basic" (cl-tok-types "#| hello |# foo") (list "symbol"))
|
||||
(cl-test "block-comment: nested" (cl-tok-types "#| a #| b |# c |# x") (list "symbol"))
|
||||
|
||||
;; ── combined ──────────────────────────────────────────────────────
|
||||
|
||||
(cl-test
|
||||
"combined: defun skeleton"
|
||||
(cl-tok-types "(defun foo (x) x)")
|
||||
(list "lparen" "symbol" "symbol" "lparen" "symbol" "rparen" "symbol" "rparen"))
|
||||
|
||||
(cl-test
|
||||
"combined: let form"
|
||||
(cl-tok-types "(let ((x 1)) x)")
|
||||
(list
|
||||
"lparen"
|
||||
"symbol"
|
||||
"lparen"
|
||||
"lparen"
|
||||
"symbol"
|
||||
"integer"
|
||||
"rparen"
|
||||
"rparen"
|
||||
"symbol"
|
||||
"rparen"))
|
||||
|
||||
(cl-test
|
||||
"combined: whitespace skip"
|
||||
(cl-tok-values " foo bar baz ")
|
||||
(list "FOO" "BAR" "BAZ"))
|
||||
|
||||
(cl-test "eof: present" (get (nth (cl-tokenize "") 0) "type") "eof")
|
||||
(cl-test "eof: at end of tokens" (get (nth (cl-tokenize "x") 1) "type") "eof")
|
||||
@@ -50,7 +50,7 @@ Core mapping:
|
||||
## Roadmap
|
||||
|
||||
### Phase 1 — reader + parser
|
||||
- [ ] Tokenizer: symbols (with package qualification `pkg:sym` / `pkg::sym`), numbers (int, float, ratio `1/3`, `#xFF`, `#b1010`, `#o17`), strings `"…"` with `\` escapes, characters `#\Space` `#\Newline` `#\a`, comments `;`, block comments `#| … |#`
|
||||
- [x] Tokenizer: symbols (with package qualification `pkg:sym` / `pkg::sym`), numbers (int, float, ratio `1/3`, `#xFF`, `#b1010`, `#o17`), strings `"…"` with `\` escapes, characters `#\Space` `#\Newline` `#\a`, comments `;`, block comments `#| … |#`
|
||||
- [ ] Reader: list, dotted pair, quote `'`, function `#'`, quasiquote `` ` ``, unquote `,`, splice `,@`, vector `#(…)`, uninterned `#:foo`, nil/t literals
|
||||
- [ ] Parser: lambda lists with `&optional` `&rest` `&key` `&aux` `&allow-other-keys`, defaults, supplied-p variables
|
||||
- [ ] Unit tests in `lib/common-lisp/tests/read.sx`
|
||||
@@ -114,7 +114,7 @@ Core mapping:
|
||||
|
||||
_Newest first._
|
||||
|
||||
- _(none yet)_
|
||||
- 2026-04-25: Phase 1 tokenizer — 79 tests green. `lib/common-lisp/reader.sx` + `tests/read.sx` + `test.sh`. Handles symbols (pkg:sym, pkg::sym), integers, floats, ratios, hex/binary/octal, strings, #\ chars, reader macros (#' #( #: ,@), line/block comments. Key gotcha: SX `str` for string concat (not `concat`), substring-based read-while.
|
||||
|
||||
## Blockers
|
||||
|
||||
|
||||
Reference in New Issue
Block a user