diff --git a/lib/smalltalk/test.sh b/lib/smalltalk/test.sh new file mode 100755 index 00000000..3f6bf531 --- /dev/null +++ b/lib/smalltalk/test.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# Fast Smalltalk-on-SX test runner — pipes directly to sx_server.exe. +# Mirrors lib/haskell/test.sh. +# +# Usage: +# bash lib/smalltalk/test.sh # run all tests +# bash lib/smalltalk/test.sh -v # verbose +# bash lib/smalltalk/test.sh tests/tokenize.sx # run one file + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe" +if [ ! -x "$SX_SERVER" ]; then + MAIN_ROOT=$(git worktree list | head -1 | awk '{print $1}') + if [ -x "$MAIN_ROOT/$SX_SERVER" ]; then + SX_SERVER="$MAIN_ROOT/$SX_SERVER" + else + echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build" + exit 1 + fi +fi + +VERBOSE="" +FILES=() +for arg in "$@"; do + case "$arg" in + -v|--verbose) VERBOSE=1 ;; + *) FILES+=("$arg") ;; + esac +done + +if [ ${#FILES[@]} -eq 0 ]; then + mapfile -t FILES < <(find lib/smalltalk/tests -maxdepth 2 -name '*.sx' | sort) +fi + +TOTAL_PASS=0 +TOTAL_FAIL=0 +FAILED_FILES=() + +for FILE in "${FILES[@]}"; do + [ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; } + TMPFILE=$(mktemp) + cat > "$TMPFILE" <&1 || true) + rm -f "$TMPFILE" + + LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}') + if [ -z "$LINE" ]; then + LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \ + | sed -E 's/^\(ok 3 //; s/\)$//') + fi + if [ -z "$LINE" ]; then + echo "X $FILE: could not extract summary" + echo "$OUTPUT" | tail -30 + TOTAL_FAIL=$((TOTAL_FAIL + 1)) + FAILED_FILES+=("$FILE") + continue + fi + P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/') + F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/') + TOTAL_PASS=$((TOTAL_PASS + P)) + TOTAL_FAIL=$((TOTAL_FAIL + F)) + if [ "$F" -gt 0 ]; then + FAILED_FILES+=("$FILE") + printf 'X %-40s %d/%d\n' "$FILE" "$P" "$((P+F))" + TMPFILE2=$(mktemp) + cat > "$TMPFILE2" <&1 | grep -E '^\(ok 3 ' || true) + rm -f "$TMPFILE2" + echo " $FAILS" + elif [ "$VERBOSE" = "1" ]; then + printf 'OK %-40s %d passed\n' "$FILE" "$P" + fi +done + +TOTAL=$((TOTAL_PASS + TOTAL_FAIL)) +if [ $TOTAL_FAIL -eq 0 ]; then + echo "OK $TOTAL_PASS/$TOTAL smalltalk-on-sx tests passed" +else + echo "FAIL $TOTAL_PASS/$TOTAL passed, $TOTAL_FAIL failed in: ${FAILED_FILES[*]}" +fi + +[ $TOTAL_FAIL -eq 0 ] diff --git a/lib/smalltalk/tests/tokenize.sx b/lib/smalltalk/tests/tokenize.sx new file mode 100644 index 00000000..23f5fdb3 --- /dev/null +++ b/lib/smalltalk/tests/tokenize.sx @@ -0,0 +1,362 @@ +;; Smalltalk tokenizer tests. +;; +;; Lightweight runner: each test checks actual vs expected with structural +;; equality and accumulates pass/fail counters. Final summary read by +;; lib/smalltalk/test.sh. + +(define + st-deep=? + (fn + (a b) + (cond + ((= a b) true) + ((and (dict? a) (dict? b)) + (let + ((ak (keys a)) (bk (keys b))) + (if + (not (= (len ak) (len bk))) + false + (every? + (fn + (k) + (and (has-key? b k) (st-deep=? (get a k) (get b k)))) + ak)))) + ((and (list? a) (list? b)) + (if + (not (= (len a) (len b))) + false + (let + ((i 0) (ok true)) + (begin + (define + de-loop + (fn + () + (when + (and ok (< i (len a))) + (begin + (when + (not (st-deep=? (nth a i) (nth b i))) + (set! ok false)) + (set! i (+ i 1)) + (de-loop))))) + (de-loop) + ok)))) + (:else false)))) + +(define st-test-pass 0) +(define st-test-fail 0) +(define st-test-fails (list)) + +(define + st-test + (fn + (name actual expected) + (if + (st-deep=? actual expected) + (set! st-test-pass (+ st-test-pass 1)) + (begin + (set! st-test-fail (+ st-test-fail 1)) + (append! st-test-fails {:actual actual :expected expected :name name}))))) + +;; Strip eof and project to just :type/:value. +(define + st-toks + (fn + (src) + (map + (fn (tok) {:type (get tok :type) :value (get tok :value)}) + (filter + (fn (tok) (not (= (get tok :type) "eof"))) + (st-tokenize src))))) + +;; ── 1. Whitespace / empty ── +(st-test "empty input" (st-toks "") (list)) +(st-test "all whitespace" (st-toks " \t\n ") (list)) + +;; ── 2. Identifiers ── +(st-test + "lowercase ident" + (st-toks "foo") + (list {:type "ident" :value "foo"})) + +(st-test + "capitalised ident" + (st-toks "Foo") + (list {:type "ident" :value "Foo"})) + +(st-test + "underscore ident" + (st-toks "_x") + (list {:type "ident" :value "_x"})) + +(st-test + "digits in ident" + (st-toks "foo123") + (list {:type "ident" :value "foo123"})) + +(st-test + "two idents separated" + (st-toks "foo bar") + (list {:type "ident" :value "foo"} {:type "ident" :value "bar"})) + +;; ── 3. Keyword selectors ── +(st-test + "keyword selector" + (st-toks "foo:") + (list {:type "keyword" :value "foo:"})) + +(st-test + "keyword call" + (st-toks "x at: 1") + (list + {:type "ident" :value "x"} + {:type "keyword" :value "at:"} + {:type "number" :value 1})) + +(st-test + "two-keyword chain stays separate" + (st-toks "at: 1 put: 2") + (list + {:type "keyword" :value "at:"} + {:type "number" :value 1} + {:type "keyword" :value "put:"} + {:type "number" :value 2})) + +(st-test + "ident then assign — not a keyword" + (st-toks "x := 1") + (list + {:type "ident" :value "x"} + {:type "assign" :value ":="} + {:type "number" :value 1})) + +;; ── 4. Numbers ── +(st-test + "integer" + (st-toks "42") + (list {:type "number" :value 42})) + +(st-test + "float" + (st-toks "3.14") + (list {:type "number" :value 3.14})) + +(st-test + "hex radix" + (st-toks "16rFF") + (list + {:type "number" + :value + {:radix 16 :digits "FF" :value 255 :kind "radix"}})) + +(st-test + "binary radix" + (st-toks "2r1011") + (list + {:type "number" + :value + {:radix 2 :digits "1011" :value 11 :kind "radix"}})) + +(st-test + "exponent" + (st-toks "1e3") + (list {:type "number" :value 1000})) + +(st-test + "negative exponent (parser handles minus)" + (st-toks "1.5e-2") + (list {:type "number" :value 0.015})) + +;; ── 5. Strings ── +(st-test + "simple string" + (st-toks "'hi'") + (list {:type "string" :value "hi"})) + +(st-test + "empty string" + (st-toks "''") + (list {:type "string" :value ""})) + +(st-test + "doubled-quote escape" + (st-toks "'a''b'") + (list {:type "string" :value "a'b"})) + +;; ── 6. Characters ── +(st-test + "char literal letter" + (st-toks "$a") + (list {:type "char" :value "a"})) + +(st-test + "char literal punct" + (st-toks "$$") + (list {:type "char" :value "$"})) + +(st-test + "char literal space" + (st-toks "$ ") + (list {:type "char" :value " "})) + +;; ── 7. Symbols ── +(st-test + "symbol ident" + (st-toks "#foo") + (list {:type "symbol" :value "foo"})) + +(st-test + "symbol binary" + (st-toks "#+") + (list {:type "symbol" :value "+"})) + +(st-test + "symbol arrow" + (st-toks "#->") + (list {:type "symbol" :value "->"})) + +(st-test + "symbol keyword chain" + (st-toks "#at:put:") + (list {:type "symbol" :value "at:put:"})) + +(st-test + "quoted symbol with spaces" + (st-toks "#'foo bar'") + (list {:type "symbol" :value "foo bar"})) + +;; ── 8. Literal arrays / byte arrays ── +(st-test + "literal array open" + (st-toks "#(1 2)") + (list + {:type "array-open" :value "#("} + {:type "number" :value 1} + {:type "number" :value 2} + {:type "rparen" :value ")"})) + +(st-test + "byte array open" + (st-toks "#[1 2 3]") + (list + {:type "byte-array-open" :value "#["} + {:type "number" :value 1} + {:type "number" :value 2} + {:type "number" :value 3} + {:type "rbracket" :value "]"})) + +;; ── 9. Binary selectors ── +(st-test "plus" (st-toks "+") (list {:type "binary" :value "+"})) +(st-test "minus" (st-toks "-") (list {:type "binary" :value "-"})) +(st-test "star" (st-toks "*") (list {:type "binary" :value "*"})) +(st-test "double-equal" (st-toks "==") (list {:type "binary" :value "=="})) +(st-test "leq" (st-toks "<=") (list {:type "binary" :value "<="})) +(st-test "geq" (st-toks ">=") (list {:type "binary" :value ">="})) +(st-test "neq" (st-toks "~=") (list {:type "binary" :value "~="})) +(st-test "arrow" (st-toks "->") (list {:type "binary" :value "->"})) +(st-test "comma" (st-toks ",") (list {:type "binary" :value ","})) + +(st-test + "binary in expression" + (st-toks "a + b") + (list + {:type "ident" :value "a"} + {:type "binary" :value "+"} + {:type "ident" :value "b"})) + +;; ── 10. Punctuation ── +(st-test "lparen" (st-toks "(") (list {:type "lparen" :value "("})) +(st-test "rparen" (st-toks ")") (list {:type "rparen" :value ")"})) +(st-test "lbracket" (st-toks "[") (list {:type "lbracket" :value "["})) +(st-test "rbracket" (st-toks "]") (list {:type "rbracket" :value "]"})) +(st-test "lbrace" (st-toks "{") (list {:type "lbrace" :value "{"})) +(st-test "rbrace" (st-toks "}") (list {:type "rbrace" :value "}"})) +(st-test "period" (st-toks ".") (list {:type "period" :value "."})) +(st-test "semi" (st-toks ";") (list {:type "semi" :value ";"})) +(st-test "bar" (st-toks "|") (list {:type "bar" :value "|"})) +(st-test "caret" (st-toks "^") (list {:type "caret" :value "^"})) +(st-test "bang" (st-toks "!") (list {:type "bang" :value "!"})) +(st-test "colon" (st-toks ":") (list {:type "colon" :value ":"})) +(st-test "assign" (st-toks ":=") (list {:type "assign" :value ":="})) + +;; ── 11. Comments ── +(st-test "comment skipped" (st-toks "\"hello\"") (list)) +(st-test + "comment between tokens" + (st-toks "a \"comment\" b") + (list {:type "ident" :value "a"} {:type "ident" :value "b"})) +(st-test + "multi-line comment" + (st-toks "\"line1\nline2\"42") + (list {:type "number" :value 42})) + +;; ── 12. Compound expressions ── +(st-test + "block with params" + (st-toks "[:a :b | a + b]") + (list + {:type "lbracket" :value "["} + {:type "colon" :value ":"} + {:type "ident" :value "a"} + {:type "colon" :value ":"} + {:type "ident" :value "b"} + {:type "bar" :value "|"} + {:type "ident" :value "a"} + {:type "binary" :value "+"} + {:type "ident" :value "b"} + {:type "rbracket" :value "]"})) + +(st-test + "cascade" + (st-toks "x m1; m2") + (list + {:type "ident" :value "x"} + {:type "ident" :value "m1"} + {:type "semi" :value ";"} + {:type "ident" :value "m2"})) + +(st-test + "method body return" + (st-toks "^ self foo") + (list + {:type "caret" :value "^"} + {:type "ident" :value "self"} + {:type "ident" :value "foo"})) + +(st-test + "class declaration head" + (st-toks "Object subclass: #Foo") + (list + {:type "ident" :value "Object"} + {:type "keyword" :value "subclass:"} + {:type "symbol" :value "Foo"})) + +(st-test + "temp declaration" + (st-toks "| t1 t2 |") + (list + {:type "bar" :value "|"} + {:type "ident" :value "t1"} + {:type "ident" :value "t2"} + {:type "bar" :value "|"})) + +(st-test + "chunk separator" + (st-toks "Foo bar !") + (list + {:type "ident" :value "Foo"} + {:type "ident" :value "bar"} + {:type "bang" :value "!"})) + +(st-test + "keyword call with binary precedence" + (st-toks "x foo: 1 + 2") + (list + {:type "ident" :value "x"} + {:type "keyword" :value "foo:"} + {:type "number" :value 1} + {:type "binary" :value "+"} + {:type "number" :value 2})) + +(list st-test-pass st-test-fail) diff --git a/lib/smalltalk/tokenizer.sx b/lib/smalltalk/tokenizer.sx new file mode 100644 index 00000000..e2e47a50 --- /dev/null +++ b/lib/smalltalk/tokenizer.sx @@ -0,0 +1,366 @@ +;; Smalltalk tokenizer. +;; +;; Token types: +;; ident identifier (foo, Foo, _x) +;; keyword selector keyword (foo:) — value is "foo:" with the colon +;; binary binary selector chars run together (+, ==, ->, <=, ~=, ...) +;; number integer or float; radix integers like 16rFF supported +;; string 'hello''world' style +;; char $c +;; symbol #foo, #foo:bar:, #+, #'with spaces' +;; array-open #( +;; byte-array-open #[ +;; lparen rparen lbracket rbracket lbrace rbrace +;; period semi bar caret colon assign bang +;; eof +;; +;; Comments "…" are skipped. + +(define st-make-token (fn (type value pos) {:type type :value value :pos pos})) + +(define st-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) + +(define + st-letter? + (fn + (c) + (and + (not (= c nil)) + (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) + +(define st-ident-start? (fn (c) (or (st-letter? c) (= c "_")))) + +(define st-ident-char? (fn (c) (or (st-ident-start? c) (st-digit? c)))) + +(define st-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) + +(define + st-binary-chars + (list "+" "-" "*" "/" "\\" "~" "<" ">" "=" "@" "%" "&" "?" ",")) + +(define + st-binary-char? + (fn (c) (and (not (= c nil)) (contains? st-binary-chars c)))) + +(define + st-radix-digit? + (fn + (c) + (and + (not (= c nil)) + (or (st-digit? c) (and (>= c "A") (<= c "Z")))))) + +(define + st-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define + pk + (fn + (offset) + (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) + (define cur (fn () (pk 0))) + (define advance! (fn (n) (set! pos (+ pos n)))) + (define + push! + (fn + (type value start) + (append! tokens (st-make-token type value start)))) + (define + skip-comment! + (fn + () + (cond + ((>= pos src-len) nil) + ((= (cur) "\"") (advance! 1)) + (else (begin (advance! 1) (skip-comment!)))))) + (define + skip-ws! + (fn + () + (cond + ((>= pos src-len) nil) + ((st-ws? (cur)) (begin (advance! 1) (skip-ws!))) + ((= (cur) "\"") (begin (advance! 1) (skip-comment!) (skip-ws!))) + (else nil)))) + (define + read-ident-chars! + (fn + () + (when + (and (< pos src-len) (st-ident-char? (cur))) + (begin (advance! 1) (read-ident-chars!))))) + (define + read-decimal-digits! + (fn + () + (when + (and (< pos src-len) (st-digit? (cur))) + (begin (advance! 1) (read-decimal-digits!))))) + (define + read-radix-digits! + (fn + () + (when + (and (< pos src-len) (st-radix-digit? (cur))) + (begin (advance! 1) (read-radix-digits!))))) + (define + read-exp-part! + (fn + () + (when + (and + (< pos src-len) + (or (= (cur) "e") (= (cur) "E")) + (let + ((p1 (pk 1)) (p2 (pk 2))) + (or + (st-digit? p1) + (and (or (= p1 "+") (= p1 "-")) (st-digit? p2))))) + (begin + (advance! 1) + (when + (and (< pos src-len) (or (= (cur) "+") (= (cur) "-"))) + (advance! 1)) + (read-decimal-digits!))))) + (define + read-number + (fn + (start) + (begin + (read-decimal-digits!) + (cond + ((and (< pos src-len) (= (cur) "r")) + (let + ((base-str (slice src start pos))) + (begin + (advance! 1) + (let + ((rstart pos)) + (begin + (read-radix-digits!) + (let + ((digits (slice src rstart pos))) + {:radix (parse-number base-str) + :digits digits + :value (parse-radix base-str digits) + :kind "radix"})))))) + ((and + (< pos src-len) + (= (cur) ".") + (st-digit? (pk 1))) + (begin + (advance! 1) + (read-decimal-digits!) + (read-exp-part!) + (parse-number (slice src start pos)))) + (else + (begin + (read-exp-part!) + (parse-number (slice src start pos)))))))) + (define + parse-radix + (fn + (base-str digits) + (let + ((base (parse-number base-str)) + (chars digits) + (n-len (len digits)) + (idx 0) + (acc 0)) + (begin + (define + rd-loop + (fn + () + (when + (< idx n-len) + (let + ((c (nth chars idx))) + (let + ((d (cond + ((and (>= c "0") (<= c "9")) (- (char-code c) 48)) + ((and (>= c "A") (<= c "Z")) (- (char-code c) 55)) + (else 0)))) + (begin + (set! acc (+ (* acc base) d)) + (set! idx (+ idx 1)) + (rd-loop))))))) + (rd-loop) + acc)))) + (define + read-string + (fn + () + (let + ((chars (list))) + (begin + (advance! 1) + (define + loop + (fn + () + (cond + ((>= pos src-len) nil) + ((= (cur) "'") + (cond + ((= (pk 1) "'") + (begin + (append! chars "'") + (advance! 2) + (loop))) + (else (advance! 1)))) + (else + (begin (append! chars (cur)) (advance! 1) (loop)))))) + (loop) + (join "" chars))))) + (define + read-binary-run! + (fn + () + (let + ((start pos)) + (begin + (define + bin-loop + (fn + () + (when + (and (< pos src-len) (st-binary-char? (cur))) + (begin (advance! 1) (bin-loop))))) + (bin-loop) + (slice src start pos))))) + (define + read-symbol + (fn + (start) + (cond + ;; Quoted symbol: #'whatever' + ((= (cur) "'") + (let ((s (read-string))) (push! "symbol" s start))) + ;; Binary-char symbol: #+, #==, #->, #| + ((or (st-binary-char? (cur)) (= (cur) "|")) + (let ((b (read-binary-run!))) + (cond + ((= b "") + ;; lone | wasn't binary; consume it + (begin (advance! 1) (push! "symbol" "|" start))) + (else (push! "symbol" b start))))) + ;; Identifier or keyword chain: #foo, #foo:bar: + ((st-ident-start? (cur)) + (let ((id-start pos)) + (begin + (read-ident-chars!) + (define + kw-loop + (fn + () + (when + (and (< pos src-len) (= (cur) ":")) + (begin + (advance! 1) + (when + (and (< pos src-len) (st-ident-start? (cur))) + (begin (read-ident-chars!) (kw-loop))))))) + (kw-loop) + (push! "symbol" (slice src id-start pos) start)))) + (else + (error + (str "st-tokenize: bad symbol at " pos)))))) + (define + step + (fn + () + (begin + (skip-ws!) + (when + (< pos src-len) + (let + ((start pos) (c (cur))) + (cond + ;; Identifier or keyword + ((st-ident-start? c) + (begin + (read-ident-chars!) + (let + ((word (slice src start pos))) + (cond + ;; ident immediately followed by ':' (and not ':=') => keyword + ((and + (< pos src-len) + (= (cur) ":") + (not (= (pk 1) "="))) + (begin + (advance! 1) + (push! + "keyword" + (str word ":") + start))) + (else (push! "ident" word start)))) + (step))) + ;; Number + ((st-digit? c) + (let + ((v (read-number start))) + (begin (push! "number" v start) (step)))) + ;; String + ((= c "'") + (let + ((s (read-string))) + (begin (push! "string" s start) (step)))) + ;; Character literal + ((= c "$") + (cond + ((>= (+ pos 1) src-len) + (error (str "st-tokenize: $ at end of input"))) + (else + (begin + (advance! 1) + (push! "char" (cur) start) + (advance! 1) + (step))))) + ;; Symbol or array literal + ((= c "#") + (cond + ((= (pk 1) "(") + (begin (advance! 2) (push! "array-open" "#(" start) (step))) + ((= (pk 1) "[") + (begin (advance! 2) (push! "byte-array-open" "#[" start) (step))) + (else + (begin (advance! 1) (read-symbol start) (step))))) + ;; Assignment := or bare colon + ((= c ":") + (cond + ((= (pk 1) "=") + (begin (advance! 2) (push! "assign" ":=" start) (step))) + (else + (begin (advance! 1) (push! "colon" ":" start) (step))))) + ;; Single-char structural punctuation + ((= c "(") (begin (advance! 1) (push! "lparen" "(" start) (step))) + ((= c ")") (begin (advance! 1) (push! "rparen" ")" start) (step))) + ((= c "[") (begin (advance! 1) (push! "lbracket" "[" start) (step))) + ((= c "]") (begin (advance! 1) (push! "rbracket" "]" start) (step))) + ((= c "{") (begin (advance! 1) (push! "lbrace" "{" start) (step))) + ((= c "}") (begin (advance! 1) (push! "rbrace" "}" start) (step))) + ((= c ".") (begin (advance! 1) (push! "period" "." start) (step))) + ((= c ";") (begin (advance! 1) (push! "semi" ";" start) (step))) + ((= c "|") (begin (advance! 1) (push! "bar" "|" start) (step))) + ((= c "^") (begin (advance! 1) (push! "caret" "^" start) (step))) + ((= c "!") (begin (advance! 1) (push! "bang" "!" start) (step))) + ;; Binary selector run + ((st-binary-char? c) + (let + ((b (read-binary-run!))) + (begin (push! "binary" b start) (step)))) + (else + (error + (str + "st-tokenize: unexpected char " + c + " at " + pos))))))))) + (step) + (push! "eof" nil pos) + tokens))) diff --git a/plans/smalltalk-on-sx.md b/plans/smalltalk-on-sx.md index 2d4f47f1..51412d67 100644 --- a/plans/smalltalk-on-sx.md +++ b/plans/smalltalk-on-sx.md @@ -50,7 +50,7 @@ Core mapping: ## Roadmap ### Phase 1 — tokenizer + parser -- [ ] Tokenizer: identifiers, keywords (`foo:`), binary selectors (`+`, `==`, `,`, `->`, `~=` etc.), numbers (radix `16r1F`, scaled `1.5s2`), strings `'…''…'`, characters `$c`, symbols `#foo` `#'foo bar'` `#+`, byte arrays `#[1 2 3]`, literal arrays `#(1 #foo 'x')`, comments `"…"` +- [x] Tokenizer: identifiers, keywords (`foo:`), binary selectors (`+`, `==`, `,`, `->`, `~=` etc.), numbers (radix `16r1F`; **scaled `1.5s2` deferred**), strings `'…''…'`, characters `$c`, symbols `#foo` `#'foo bar'` `#+`, byte arrays `#[1 2 3]` (open token), literal arrays `#(1 #foo 'x')` (open token), comments `"…"` - [ ] Parser: chunk format (`! !` separators), class definitions (`Object subclass: #X instanceVariableNames: '…' classVariableNames: '…' …`), method definitions (`extend: #Foo with: 'bar ^self'`), pragmas ``, blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword) - [ ] Unit tests in `lib/smalltalk/tests/parse.sx` @@ -107,7 +107,7 @@ Core mapping: _Newest first. Agent appends on every commit._ -- _(none yet)_ +- 2026-04-25: tokenizer + 63 tests (`lib/smalltalk/tokenizer.sx`, `lib/smalltalk/tests/tokenize.sx`, `lib/smalltalk/test.sh`). All token types covered except scaled decimals `1.5s2` (deferred). `#(` and `#[` emit open tokens; literal-array contents lexed as ordinary tokens for the parser to interpret. ## Blockers