Files
rose-ash/lib/apl/tokenizer.sx
giles a677585639
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 53s
apl: programs-e2e + ⌿/⍀ glyph fix (+15 tests, 490/490)
programs-e2e.sx exercises the classic-algorithm shapes from
lib/apl/tests/programs/*.apl via the full pipeline (apl-run on
embedded source strings).  Tests include factorial-via-∇,
triangular numbers, sum-of-squares, prime-mask building blocks
(divisor counts via outer mod), named-fn composition,
dyadic max-of-two, and a single Newton sqrt step.

The original one-liners (e.g. primes' inline ⍵←⍳⍵) need parser
features we haven't built (compress-as-fn, inline assign) — the
e2e tests use multi-statement equivalents.  No file-reading
primitive in OCaml SX, so source is embedded.

Side-fix: ⌿ (first-axis reduce) and ⍀ (first-axis scan) were
silently skipped by the tokenizer — added to apl-glyph-set
and apl-parse-op-glyphs.
2026-05-07 18:31:57 +00:00

181 lines
6.5 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
(define apl-glyph-set
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~" "<" "≤" "=" "≥" ">" "≠"
"≢" "≡" "∊" "∧" "" "⍱" "⍲" "," "⍪" "" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
"" "∩" "" "⍸" "⌷" "⍋" "⍒" "⊥" "" "⊣" "⊢" "⍎" "⍕"
"" "⍵" "∇" "/" "⌿" "\\" "⍀" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
(define apl-glyph?
(fn (ch)
(some (fn (g) (= g ch)) apl-glyph-set)))
(define apl-digit?
(fn (ch)
(and (string? ch) (>= ch "0") (<= ch "9"))))
(define apl-alpha?
(fn (ch)
(and (string? ch)
(or (and (>= ch "a") (<= ch "z"))
(and (>= ch "A") (<= ch "Z"))
(= ch "_")))))
(define apl-tokenize
(fn (source)
(let ((pos 0)
(src-len (len source))
(tokens (list)))
(define tok-push!
(fn (type value)
(append! tokens {:type type :value value})))
(define cur-sw?
(fn (ch)
(and (< pos src-len) (starts-with? (slice source pos) ch))))
(define cur-byte
(fn ()
(if (< pos src-len) (nth source pos) nil)))
(define advance!
(fn ()
(set! pos (+ pos 1))))
(define consume!
(fn (ch)
(set! pos (+ pos (len ch)))))
(define find-glyph
(fn ()
(let ((rem (slice source pos)))
(let ((matches (filter (fn (g) (starts-with? rem g)) apl-glyph-set)))
(if (> (len matches) 0) (first matches) nil)))))
(define read-digits!
(fn (acc)
(if (and (< pos src-len) (apl-digit? (cur-byte)))
(let ((ch (cur-byte)))
(begin
(advance!)
(read-digits! (str acc ch))))
acc)))
(define read-ident-cont!
(fn ()
(when (and (< pos src-len)
(let ((ch (cur-byte)))
(or (apl-alpha? ch) (apl-digit? ch))))
(begin
(advance!)
(read-ident-cont!)))))
(define read-string!
(fn (acc)
(cond
((>= pos src-len) acc)
((cur-sw? "'")
(if (and (< (+ pos 1) src-len) (cur-sw? "'"))
(begin
(advance!)
(advance!)
(read-string! (str acc "'")))
(begin (advance!) acc)))
(true
(let ((ch (cur-byte)))
(begin
(advance!)
(read-string! (str acc ch))))))))
(define skip-line!
(fn ()
(when (and (< pos src-len) (not (cur-sw? "\n")))
(begin
(advance!)
(skip-line!)))))
(define scan!
(fn ()
(when (< pos src-len)
(let ((ch (cur-byte)))
(cond
((or (= ch " ") (= ch "\t") (= ch "\r"))
(begin (advance!) (scan!)))
((= ch "\n")
(begin (advance!) (tok-push! :newline nil) (scan!)))
((cur-sw? "⍝")
(begin (skip-line!) (scan!)))
((cur-sw? "⋄")
(begin (consume! "⋄") (tok-push! :diamond nil) (scan!)))
((= ch "(")
(begin (advance!) (tok-push! :lparen nil) (scan!)))
((= ch ")")
(begin (advance!) (tok-push! :rparen nil) (scan!)))
((= ch "[")
(begin (advance!) (tok-push! :lbracket nil) (scan!)))
((= ch "]")
(begin (advance!) (tok-push! :rbracket nil) (scan!)))
((= ch "{")
(begin (advance!) (tok-push! :lbrace nil) (scan!)))
((= ch "}")
(begin (advance!) (tok-push! :rbrace nil) (scan!)))
((= ch ";")
(begin (advance!) (tok-push! :semi nil) (scan!)))
((cur-sw? "←")
(begin (consume! "←") (tok-push! :assign nil) (scan!)))
((= ch ":")
(let ((start pos))
(begin
(advance!)
(if (and (< pos src-len) (apl-alpha? (cur-byte)))
(begin
(read-ident-cont!)
(tok-push! :keyword (slice source start pos)))
(tok-push! :colon nil))
(scan!))))
((and (cur-sw? "¯")
(< (+ pos (len "¯")) src-len)
(apl-digit? (nth source (+ pos (len "¯")))))
(begin
(consume! "¯")
(let ((digits (read-digits! "")))
(if (and (< pos src-len) (= (cur-byte) ".")
(< (+ pos 1) src-len) (apl-digit? (nth source (+ pos 1))))
(begin (advance!)
(let ((frac (read-digits! "")))
(tok-push! :num (- 0 (string->number (str digits "." frac))))))
(tok-push! :num (- 0 (parse-int digits 0)))))
(scan!)))
((apl-digit? ch)
(begin
(let ((digits (read-digits! "")))
(if (and (< pos src-len) (= (cur-byte) ".")
(< (+ pos 1) src-len) (apl-digit? (nth source (+ pos 1))))
(begin (advance!)
(let ((frac (read-digits! "")))
(tok-push! :num (string->number (str digits "." frac)))))
(tok-push! :num (parse-int digits 0))))
(scan!)))
((= ch "'")
(begin
(advance!)
(let ((s (read-string! "")))
(tok-push! :str s))
(scan!)))
((or (apl-alpha? ch) (cur-sw? "⎕"))
(let ((start pos))
(begin
(if (cur-sw? "⎕") (consume! "⎕") (advance!))
(if (and (< pos src-len) (cur-sw? "←"))
(consume! "←")
(read-ident-cont!))
(tok-push! :name (slice source start pos))
(scan!))))
(true
(let ((g (find-glyph)))
(if g
(begin (consume! g) (tok-push! :glyph g) (scan!))
(begin (advance!) (scan!))))))))))
(scan!)
tokens)))