apl: tokenizer + 46 tests (Phase 1, step 1)

Unicode-aware byte scanner using starts-with?/consume! for multi-byte
APL glyphs. Handles numbers (¯-negative), string literals, identifiers
(⎕ system names), all APL function/operator glyphs, :Keywords,
comments ⍝, diamond ⋄, assignment ←.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 18:22:30 +00:00
parent 3316d402fd
commit c73b696494
2 changed files with 251 additions and 0 deletions

83
lib/apl/tests/parse.sx Normal file
View File

@@ -0,0 +1,83 @@
(define apl-test-count 0)
(define apl-test-pass 0)
(define apl-test-fails (list))
(define apl-test
(fn (name actual expected)
(begin
(set! apl-test-count (+ apl-test-count 1))
(if (= actual expected)
(set! apl-test-pass (+ apl-test-pass 1))
(append! apl-test-fails {:name name :actual actual :expected expected})))))
(define tok-types
(fn (src)
(map (fn (t) (get t :type)) (apl-tokenize src))))
(define tok-values
(fn (src)
(map (fn (t) (get t :value)) (apl-tokenize src))))
(define tok-count
(fn (src)
(len (apl-tokenize src))))
(define tok-type-at
(fn (src i)
(get (nth (apl-tokenize src) i) :type)))
(define tok-value-at
(fn (src i)
(get (nth (apl-tokenize src) i) :value)))
(apl-test "empty: no tokens" (tok-count "") 0)
(apl-test "empty: whitespace only" (tok-count " ") 0)
(apl-test "num: zero" (tok-values "0") (list 0))
(apl-test "num: positive" (tok-values "42") (list 42))
(apl-test "num: large" (tok-values "12345") (list 12345))
(apl-test "num: negative" (tok-values "¯5") (list -5))
(apl-test "num: negative zero" (tok-values "¯0") (list 0))
(apl-test "num: strand count" (tok-count "1 2 3") 3)
(apl-test "num: strand types" (tok-types "1 2 3") (list :num :num :num))
(apl-test "num: strand values" (tok-values "1 2 3") (list 1 2 3))
(apl-test "num: neg in strand" (tok-values "1 ¯2 3") (list 1 -2 3))
(apl-test "str: empty" (tok-values "''") (list ""))
(apl-test "str: single char" (tok-values "'a'") (list "a"))
(apl-test "str: word" (tok-values "'hello'") (list "hello"))
(apl-test "str: escaped quote" (tok-values "''''") (list "'"))
(apl-test "str: type" (tok-types "'abc'") (list :str))
(apl-test "name: simple" (tok-values "foo") (list "foo"))
(apl-test "name: type" (tok-types "foo") (list :name))
(apl-test "name: mixed case" (tok-values "MyVar") (list "MyVar"))
(apl-test "name: with digits" (tok-values "x1") (list "x1"))
(apl-test "name: system var" (tok-values "⎕IO") (list "⎕IO"))
(apl-test "name: system var type" (tok-types "⎕IO") (list :name))
(apl-test "glyph: plus" (tok-types "+") (list :glyph))
(apl-test "glyph: plus value" (tok-values "+") (list "+"))
(apl-test "glyph: iota" (tok-values "") (list ""))
(apl-test "glyph: reduce" (tok-values "+/") (list "+" "/"))
(apl-test "glyph: floor" (tok-values "⌊") (list "⌊"))
(apl-test "glyph: rho" (tok-values "") (list ""))
(apl-test "glyph: alpha omega" (tok-types " ⍵") (list :glyph :glyph))
(apl-test "punct: lparen" (tok-types "(") (list :lparen))
(apl-test "punct: rparen" (tok-types ")") (list :rparen))
(apl-test "punct: brackets" (tok-types "[42]") (list :lbracket :num :rbracket))
(apl-test "punct: braces" (tok-types "{}") (list :lbrace :rbrace))
(apl-test "punct: semi" (tok-types ";") (list :semi))
(apl-test "assign: arrow" (tok-types "x←1") (list :name :assign :num))
(apl-test "diamond: separator" (tok-types "1⋄2") (list :num :diamond :num))
(apl-test "newline: emitted" (tok-types "1\n2") (list :num :newline :num))
(apl-test "comment: skipped" (tok-count "⍝ ignore me") 0)
(apl-test "comment: rest ignored" (tok-count "1 ⍝ note") 1)
(apl-test "colon: bare" (tok-types ":") (list :colon))
(apl-test "keyword: If" (tok-values ":If") (list ":If"))
(apl-test "keyword: type" (tok-types ":While") (list :keyword))
(apl-test "keyword: EndFor" (tok-values ":EndFor") (list ":EndFor"))
(apl-test "expr: +/ 5" (tok-types "+/ 5") (list :glyph :glyph :glyph :num))
(apl-test "expr: x←42" (tok-count "x←42") 3)
(apl-test "expr: dfn body" (tok-types "{+⍵}")
(list :lbrace :glyph :glyph :glyph :rbrace))
(define apl-tokenize-test-summary
(str "tokenizer " apl-test-pass "/" apl-test-count
(if (= (len apl-test-fails) 0) "" (str " FAILS: " apl-test-fails))))