Unicode-aware byte scanner using starts-with?/consume! for multi-byte APL glyphs. Handles numbers (¯-negative), string literals, identifiers (⎕ system names), all APL function/operator glyphs, :Keywords, comments ⍝, diamond ⋄, assignment ←. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
84 lines
4.4 KiB
Plaintext
84 lines
4.4 KiB
Plaintext
(define apl-test-count 0)
|
||
(define apl-test-pass 0)
|
||
(define apl-test-fails (list))
|
||
|
||
(define apl-test
|
||
(fn (name actual expected)
|
||
(begin
|
||
(set! apl-test-count (+ apl-test-count 1))
|
||
(if (= actual expected)
|
||
(set! apl-test-pass (+ apl-test-pass 1))
|
||
(append! apl-test-fails {:name name :actual actual :expected expected})))))
|
||
|
||
(define tok-types
|
||
(fn (src)
|
||
(map (fn (t) (get t :type)) (apl-tokenize src))))
|
||
|
||
(define tok-values
|
||
(fn (src)
|
||
(map (fn (t) (get t :value)) (apl-tokenize src))))
|
||
|
||
(define tok-count
|
||
(fn (src)
|
||
(len (apl-tokenize src))))
|
||
|
||
(define tok-type-at
|
||
(fn (src i)
|
||
(get (nth (apl-tokenize src) i) :type)))
|
||
|
||
(define tok-value-at
|
||
(fn (src i)
|
||
(get (nth (apl-tokenize src) i) :value)))
|
||
|
||
(apl-test "empty: no tokens" (tok-count "") 0)
|
||
(apl-test "empty: whitespace only" (tok-count " ") 0)
|
||
(apl-test "num: zero" (tok-values "0") (list 0))
|
||
(apl-test "num: positive" (tok-values "42") (list 42))
|
||
(apl-test "num: large" (tok-values "12345") (list 12345))
|
||
(apl-test "num: negative" (tok-values "¯5") (list -5))
|
||
(apl-test "num: negative zero" (tok-values "¯0") (list 0))
|
||
(apl-test "num: strand count" (tok-count "1 2 3") 3)
|
||
(apl-test "num: strand types" (tok-types "1 2 3") (list :num :num :num))
|
||
(apl-test "num: strand values" (tok-values "1 2 3") (list 1 2 3))
|
||
(apl-test "num: neg in strand" (tok-values "1 ¯2 3") (list 1 -2 3))
|
||
(apl-test "str: empty" (tok-values "''") (list ""))
|
||
(apl-test "str: single char" (tok-values "'a'") (list "a"))
|
||
(apl-test "str: word" (tok-values "'hello'") (list "hello"))
|
||
(apl-test "str: escaped quote" (tok-values "''''") (list "'"))
|
||
(apl-test "str: type" (tok-types "'abc'") (list :str))
|
||
(apl-test "name: simple" (tok-values "foo") (list "foo"))
|
||
(apl-test "name: type" (tok-types "foo") (list :name))
|
||
(apl-test "name: mixed case" (tok-values "MyVar") (list "MyVar"))
|
||
(apl-test "name: with digits" (tok-values "x1") (list "x1"))
|
||
(apl-test "name: system var" (tok-values "⎕IO") (list "⎕IO"))
|
||
(apl-test "name: system var type" (tok-types "⎕IO") (list :name))
|
||
(apl-test "glyph: plus" (tok-types "+") (list :glyph))
|
||
(apl-test "glyph: plus value" (tok-values "+") (list "+"))
|
||
(apl-test "glyph: iota" (tok-values "⍳") (list "⍳"))
|
||
(apl-test "glyph: reduce" (tok-values "+/") (list "+" "/"))
|
||
(apl-test "glyph: floor" (tok-values "⌊") (list "⌊"))
|
||
(apl-test "glyph: rho" (tok-values "⍴") (list "⍴"))
|
||
(apl-test "glyph: alpha omega" (tok-types "⍺ ⍵") (list :glyph :glyph))
|
||
(apl-test "punct: lparen" (tok-types "(") (list :lparen))
|
||
(apl-test "punct: rparen" (tok-types ")") (list :rparen))
|
||
(apl-test "punct: brackets" (tok-types "[42]") (list :lbracket :num :rbracket))
|
||
(apl-test "punct: braces" (tok-types "{}") (list :lbrace :rbrace))
|
||
(apl-test "punct: semi" (tok-types ";") (list :semi))
|
||
(apl-test "assign: arrow" (tok-types "x←1") (list :name :assign :num))
|
||
(apl-test "diamond: separator" (tok-types "1⋄2") (list :num :diamond :num))
|
||
(apl-test "newline: emitted" (tok-types "1\n2") (list :num :newline :num))
|
||
(apl-test "comment: skipped" (tok-count "⍝ ignore me") 0)
|
||
(apl-test "comment: rest ignored" (tok-count "1 ⍝ note") 1)
|
||
(apl-test "colon: bare" (tok-types ":") (list :colon))
|
||
(apl-test "keyword: If" (tok-values ":If") (list ":If"))
|
||
(apl-test "keyword: type" (tok-types ":While") (list :keyword))
|
||
(apl-test "keyword: EndFor" (tok-values ":EndFor") (list ":EndFor"))
|
||
(apl-test "expr: +/ ⍳ 5" (tok-types "+/ ⍳ 5") (list :glyph :glyph :glyph :num))
|
||
(apl-test "expr: x←42" (tok-count "x←42") 3)
|
||
(apl-test "expr: dfn body" (tok-types "{⍺+⍵}")
|
||
(list :lbrace :glyph :glyph :glyph :rbrace))
|
||
|
||
(define apl-tokenize-test-summary
|
||
(str "tokenizer " apl-test-pass "/" apl-test-count
|
||
(if (= (len apl-test-fails) 0) "" (str " FAILS: " apl-test-fails))))
|