;; Smalltalk tokenizer tests. ;; ;; Lightweight runner: each test checks actual vs expected with structural ;; equality and accumulates pass/fail counters. Final summary read by ;; lib/smalltalk/test.sh. (define st-deep=? (fn (a b) (cond ((= a b) true) ((and (dict? a) (dict? b)) (let ((ak (keys a)) (bk (keys b))) (if (not (= (len ak) (len bk))) false (every? (fn (k) (and (has-key? b k) (st-deep=? (get a k) (get b k)))) ak)))) ((and (list? a) (list? b)) (if (not (= (len a) (len b))) false (let ((i 0) (ok true)) (begin (define de-loop (fn () (when (and ok (< i (len a))) (begin (when (not (st-deep=? (nth a i) (nth b i))) (set! ok false)) (set! i (+ i 1)) (de-loop))))) (de-loop) ok)))) (:else false)))) (define st-test-pass 0) (define st-test-fail 0) (define st-test-fails (list)) (define st-test (fn (name actual expected) (if (st-deep=? actual expected) (set! st-test-pass (+ st-test-pass 1)) (begin (set! st-test-fail (+ st-test-fail 1)) (append! st-test-fails {:actual actual :expected expected :name name}))))) ;; Strip eof and project to just :type/:value. (define st-toks (fn (src) (map (fn (tok) {:type (get tok :type) :value (get tok :value)}) (filter (fn (tok) (not (= (get tok :type) "eof"))) (st-tokenize src))))) ;; ── 1. Whitespace / empty ── (st-test "empty input" (st-toks "") (list)) (st-test "all whitespace" (st-toks " \t\n ") (list)) ;; ── 2. Identifiers ── (st-test "lowercase ident" (st-toks "foo") (list {:type "ident" :value "foo"})) (st-test "capitalised ident" (st-toks "Foo") (list {:type "ident" :value "Foo"})) (st-test "underscore ident" (st-toks "_x") (list {:type "ident" :value "_x"})) (st-test "digits in ident" (st-toks "foo123") (list {:type "ident" :value "foo123"})) (st-test "two idents separated" (st-toks "foo bar") (list {:type "ident" :value "foo"} {:type "ident" :value "bar"})) ;; ── 3. Keyword selectors ── (st-test "keyword selector" (st-toks "foo:") (list {:type "keyword" :value "foo:"})) (st-test "keyword call" (st-toks "x at: 1") (list {:type "ident" :value "x"} {:type "keyword" :value "at:"} {:type "number" :value 1})) (st-test "two-keyword chain stays separate" (st-toks "at: 1 put: 2") (list {:type "keyword" :value "at:"} {:type "number" :value 1} {:type "keyword" :value "put:"} {:type "number" :value 2})) (st-test "ident then assign — not a keyword" (st-toks "x := 1") (list {:type "ident" :value "x"} {:type "assign" :value ":="} {:type "number" :value 1})) ;; ── 4. Numbers ── (st-test "integer" (st-toks "42") (list {:type "number" :value 42})) (st-test "float" (st-toks "3.14") (list {:type "number" :value 3.14})) (st-test "hex radix" (st-toks "16rFF") (list {:type "number" :value {:radix 16 :digits "FF" :value 255 :kind "radix"}})) (st-test "binary radix" (st-toks "2r1011") (list {:type "number" :value {:radix 2 :digits "1011" :value 11 :kind "radix"}})) (st-test "exponent" (st-toks "1e3") (list {:type "number" :value 1000})) (st-test "negative exponent (parser handles minus)" (st-toks "1.5e-2") (list {:type "number" :value 0.015})) ;; ── 5. Strings ── (st-test "simple string" (st-toks "'hi'") (list {:type "string" :value "hi"})) (st-test "empty string" (st-toks "''") (list {:type "string" :value ""})) (st-test "doubled-quote escape" (st-toks "'a''b'") (list {:type "string" :value "a'b"})) ;; ── 6. Characters ── (st-test "char literal letter" (st-toks "$a") (list {:type "char" :value "a"})) (st-test "char literal punct" (st-toks "$$") (list {:type "char" :value "$"})) (st-test "char literal space" (st-toks "$ ") (list {:type "char" :value " "})) ;; ── 7. Symbols ── (st-test "symbol ident" (st-toks "#foo") (list {:type "symbol" :value "foo"})) (st-test "symbol binary" (st-toks "#+") (list {:type "symbol" :value "+"})) (st-test "symbol arrow" (st-toks "#->") (list {:type "symbol" :value "->"})) (st-test "symbol keyword chain" (st-toks "#at:put:") (list {:type "symbol" :value "at:put:"})) (st-test "quoted symbol with spaces" (st-toks "#'foo bar'") (list {:type "symbol" :value "foo bar"})) ;; ── 8. Literal arrays / byte arrays ── (st-test "literal array open" (st-toks "#(1 2)") (list {:type "array-open" :value "#("} {:type "number" :value 1} {:type "number" :value 2} {:type "rparen" :value ")"})) (st-test "byte array open" (st-toks "#[1 2 3]") (list {:type "byte-array-open" :value "#["} {:type "number" :value 1} {:type "number" :value 2} {:type "number" :value 3} {:type "rbracket" :value "]"})) ;; ── 9. Binary selectors ── (st-test "plus" (st-toks "+") (list {:type "binary" :value "+"})) (st-test "minus" (st-toks "-") (list {:type "binary" :value "-"})) (st-test "star" (st-toks "*") (list {:type "binary" :value "*"})) (st-test "double-equal" (st-toks "==") (list {:type "binary" :value "=="})) (st-test "leq" (st-toks "<=") (list {:type "binary" :value "<="})) (st-test "geq" (st-toks ">=") (list {:type "binary" :value ">="})) (st-test "neq" (st-toks "~=") (list {:type "binary" :value "~="})) (st-test "arrow" (st-toks "->") (list {:type "binary" :value "->"})) (st-test "comma" (st-toks ",") (list {:type "binary" :value ","})) (st-test "binary in expression" (st-toks "a + b") (list {:type "ident" :value "a"} {:type "binary" :value "+"} {:type "ident" :value "b"})) ;; ── 10. Punctuation ── (st-test "lparen" (st-toks "(") (list {:type "lparen" :value "("})) (st-test "rparen" (st-toks ")") (list {:type "rparen" :value ")"})) (st-test "lbracket" (st-toks "[") (list {:type "lbracket" :value "["})) (st-test "rbracket" (st-toks "]") (list {:type "rbracket" :value "]"})) (st-test "lbrace" (st-toks "{") (list {:type "lbrace" :value "{"})) (st-test "rbrace" (st-toks "}") (list {:type "rbrace" :value "}"})) (st-test "period" (st-toks ".") (list {:type "period" :value "."})) (st-test "semi" (st-toks ";") (list {:type "semi" :value ";"})) (st-test "bar" (st-toks "|") (list {:type "bar" :value "|"})) (st-test "caret" (st-toks "^") (list {:type "caret" :value "^"})) (st-test "bang" (st-toks "!") (list {:type "bang" :value "!"})) (st-test "colon" (st-toks ":") (list {:type "colon" :value ":"})) (st-test "assign" (st-toks ":=") (list {:type "assign" :value ":="})) ;; ── 11. Comments ── (st-test "comment skipped" (st-toks "\"hello\"") (list)) (st-test "comment between tokens" (st-toks "a \"comment\" b") (list {:type "ident" :value "a"} {:type "ident" :value "b"})) (st-test "multi-line comment" (st-toks "\"line1\nline2\"42") (list {:type "number" :value 42})) ;; ── 12. Compound expressions ── (st-test "block with params" (st-toks "[:a :b | a + b]") (list {:type "lbracket" :value "["} {:type "colon" :value ":"} {:type "ident" :value "a"} {:type "colon" :value ":"} {:type "ident" :value "b"} {:type "bar" :value "|"} {:type "ident" :value "a"} {:type "binary" :value "+"} {:type "ident" :value "b"} {:type "rbracket" :value "]"})) (st-test "cascade" (st-toks "x m1; m2") (list {:type "ident" :value "x"} {:type "ident" :value "m1"} {:type "semi" :value ";"} {:type "ident" :value "m2"})) (st-test "method body return" (st-toks "^ self foo") (list {:type "caret" :value "^"} {:type "ident" :value "self"} {:type "ident" :value "foo"})) (st-test "class declaration head" (st-toks "Object subclass: #Foo") (list {:type "ident" :value "Object"} {:type "keyword" :value "subclass:"} {:type "symbol" :value "Foo"})) (st-test "temp declaration" (st-toks "| t1 t2 |") (list {:type "bar" :value "|"} {:type "ident" :value "t1"} {:type "ident" :value "t2"} {:type "bar" :value "|"})) (st-test "chunk separator" (st-toks "Foo bar !") (list {:type "ident" :value "Foo"} {:type "ident" :value "bar"} {:type "bang" :value "!"})) (st-test "keyword call with binary precedence" (st-toks "x foo: 1 + 2") (list {:type "ident" :value "x"} {:type "keyword" :value "foo:"} {:type "number" :value 1} {:type "binary" :value "+"} {:type "number" :value 2})) (list st-test-pass st-test-fail)