Files
rose-ash/lib/apl/tokenizer.sx
giles 67ff2a3ae8
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m4s
apl: idiom corpus 34→64 + fix ≢/≡ glyph recognition (+30 tests, 445/445)
30 new source-string idioms via apl-run: triangulars, factorial,
running sum/product, parity counts, identity matrix, mult-table,
dot product, ∧.= equality, take/drop/reverse, tally, ravel,
count-of-value, etc.

Side-fix: tokenizer's apl-glyph-set was missing ≢ and ≡ — they
were silently skipped.  Added them and to apl-parse-fn-glyphs.
2026-05-07 14:20:42 +00:00

169 lines
5.7 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
(define apl-glyph-set
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~" "<" "≤" "=" "≥" ">" "≠"
"≢" "≡" "∊" "∧" "" "⍱" "⍲" "," "⍪" "" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
"" "∩" "" "⍸" "⌷" "⍋" "⍒" "⊥" "" "⊣" "⊢" "⍎" "⍕"
"" "⍵" "∇" "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
(define apl-glyph?
(fn (ch)
(some (fn (g) (= g ch)) apl-glyph-set)))
(define apl-digit?
(fn (ch)
(and (string? ch) (>= ch "0") (<= ch "9"))))
(define apl-alpha?
(fn (ch)
(and (string? ch)
(or (and (>= ch "a") (<= ch "z"))
(and (>= ch "A") (<= ch "Z"))
(= ch "_")))))
(define apl-tokenize
(fn (source)
(let ((pos 0)
(src-len (len source))
(tokens (list)))
(define tok-push!
(fn (type value)
(append! tokens {:type type :value value})))
(define cur-sw?
(fn (ch)
(and (< pos src-len) (starts-with? (slice source pos) ch))))
(define cur-byte
(fn ()
(if (< pos src-len) (nth source pos) nil)))
(define advance!
(fn ()
(set! pos (+ pos 1))))
(define consume!
(fn (ch)
(set! pos (+ pos (len ch)))))
(define find-glyph
(fn ()
(let ((rem (slice source pos)))
(let ((matches (filter (fn (g) (starts-with? rem g)) apl-glyph-set)))
(if (> (len matches) 0) (first matches) nil)))))
(define read-digits!
(fn (acc)
(if (and (< pos src-len) (apl-digit? (cur-byte)))
(let ((ch (cur-byte)))
(begin
(advance!)
(read-digits! (str acc ch))))
acc)))
(define read-ident-cont!
(fn ()
(when (and (< pos src-len)
(let ((ch (cur-byte)))
(or (apl-alpha? ch) (apl-digit? ch))))
(begin
(advance!)
(read-ident-cont!)))))
(define read-string!
(fn (acc)
(cond
((>= pos src-len) acc)
((cur-sw? "'")
(if (and (< (+ pos 1) src-len) (cur-sw? "'"))
(begin
(advance!)
(advance!)
(read-string! (str acc "'")))
(begin (advance!) acc)))
(true
(let ((ch (cur-byte)))
(begin
(advance!)
(read-string! (str acc ch))))))))
(define skip-line!
(fn ()
(when (and (< pos src-len) (not (cur-sw? "\n")))
(begin
(advance!)
(skip-line!)))))
(define scan!
(fn ()
(when (< pos src-len)
(let ((ch (cur-byte)))
(cond
((or (= ch " ") (= ch "\t") (= ch "\r"))
(begin (advance!) (scan!)))
((= ch "\n")
(begin (advance!) (tok-push! :newline nil) (scan!)))
((cur-sw? "⍝")
(begin (skip-line!) (scan!)))
((cur-sw? "⋄")
(begin (consume! "⋄") (tok-push! :diamond nil) (scan!)))
((= ch "(")
(begin (advance!) (tok-push! :lparen nil) (scan!)))
((= ch ")")
(begin (advance!) (tok-push! :rparen nil) (scan!)))
((= ch "[")
(begin (advance!) (tok-push! :lbracket nil) (scan!)))
((= ch "]")
(begin (advance!) (tok-push! :rbracket nil) (scan!)))
((= ch "{")
(begin (advance!) (tok-push! :lbrace nil) (scan!)))
((= ch "}")
(begin (advance!) (tok-push! :rbrace nil) (scan!)))
((= ch ";")
(begin (advance!) (tok-push! :semi nil) (scan!)))
((cur-sw? "←")
(begin (consume! "←") (tok-push! :assign nil) (scan!)))
((= ch ":")
(let ((start pos))
(begin
(advance!)
(if (and (< pos src-len) (apl-alpha? (cur-byte)))
(begin
(read-ident-cont!)
(tok-push! :keyword (slice source start pos)))
(tok-push! :colon nil))
(scan!))))
((and (cur-sw? "¯")
(< (+ pos (len "¯")) src-len)
(apl-digit? (nth source (+ pos (len "¯")))))
(begin
(consume! "¯")
(let ((digits (read-digits! "")))
(tok-push! :num (- 0 (parse-int digits 0))))
(scan!)))
((apl-digit? ch)
(begin
(let ((digits (read-digits! "")))
(tok-push! :num (parse-int digits 0)))
(scan!)))
((= ch "'")
(begin
(advance!)
(let ((s (read-string! "")))
(tok-push! :str s))
(scan!)))
((or (apl-alpha? ch) (cur-sw? "⎕"))
(let ((start pos))
(begin
(if (cur-sw? "⎕") (consume! "⎕") (advance!))
(read-ident-cont!)
(tok-push! :name (slice source start pos))
(scan!))))
(true
(let ((g (find-glyph)))
(if g
(begin (consume! g) (tok-push! :glyph g) (scan!))
(begin (advance!) (scan!))))))))))
(scan!)
tokens)))