Files
rose-ash/lib/apl/tokenizer.sx
giles eeb530eb85
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 51s
apl: quicksort.apl runs as-written (+7); Phase 10 complete
Three fixes for Iverson's dfn
{1≥≢⍵:⍵ ⋄ p←⍵⌷⍨?≢⍵ ⋄ (∇⍵⌿⍨⍵<p),(p=⍵)/⍵,∇⍵⌿⍨⍵>p}:

1. parser: standalone op-glyph branch (/ ⌿ \ ⍀) now consumes a
   following ⍨ or ¨ and emits :derived-fn — `⍵⌿⍨⍵<p` parses
   as compress-commute (was previously dropping ⍨)
2. tokenizer: `name←...` (no spaces) now tokenizes as separate
   :name + :assign instead of eating ← into the name. ⎕← still
   stays one token for the output op
3. inline p←⍵⌷⍨?≢⍵ mid-dfn now works via existing :assign-expr

Full suite 585/585. Phase 10 complete (all 7 items ticked).

Remaining gaps for a future phase: heterogeneous-strand inner
product is the only unfinished part — life works after dropping ⊃,
quicksort works directly.
2026-05-08 23:53:49 +00:00

199 lines
7.1 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
(define apl-glyph-set
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~" "<" "≤" "=" "≥" ">" "≠"
"≢" "≡" "∊" "∧" "" "⍱" "⍲" "," "⍪" "" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
"" "∩" "" "⍸" "⌷" "⍋" "⍒" "⊥" "" "⊣" "⊢" "⍎" "⍕"
"" "⍵" "∇" "/" "⌿" "\\" "⍀" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
(define apl-glyph?
(fn (ch)
(some (fn (g) (= g ch)) apl-glyph-set)))
(define apl-digit?
(fn (ch)
(and (string? ch) (>= ch "0") (<= ch "9"))))
(define apl-alpha?
(fn (ch)
(and (string? ch)
(or (and (>= ch "a") (<= ch "z"))
(and (>= ch "A") (<= ch "Z"))
(= ch "_")))))
(define
apl-tokenize
(fn
(source)
(let
((pos 0) (src-len (len source)) (tokens (list)))
(define tok-push! (fn (type value) (append! tokens {:value value :type type})))
(define
cur-sw?
(fn
(ch)
(and (< pos src-len) (starts-with? (slice source pos) ch))))
(define cur-byte (fn () (if (< pos src-len) (nth source pos) nil)))
(define advance! (fn () (set! pos (+ pos 1))))
(define consume! (fn (ch) (set! pos (+ pos (len ch)))))
(define
find-glyph
(fn
()
(let
((rem (slice source pos)))
(let
((matches (filter (fn (g) (starts-with? rem g)) apl-glyph-set)))
(if (> (len matches) 0) (first matches) nil)))))
(define
read-digits!
(fn
(acc)
(if
(and (< pos src-len) (apl-digit? (cur-byte)))
(let
((ch (cur-byte)))
(begin (advance!) (read-digits! (str acc ch))))
acc)))
(define
read-ident-cont!
(fn
()
(when
(and
(< pos src-len)
(let
((ch (cur-byte)))
(or (apl-alpha? ch) (apl-digit? ch))))
(begin (advance!) (read-ident-cont!)))))
(define
read-string!
(fn
(acc)
(cond
((>= pos src-len) acc)
((cur-sw? "'")
(if
(and (< (+ pos 1) src-len) (cur-sw? "'"))
(begin (advance!) (advance!) (read-string! (str acc "'")))
(begin (advance!) acc)))
(true
(let
((ch (cur-byte)))
(begin (advance!) (read-string! (str acc ch))))))))
(define
skip-line!
(fn
()
(when
(and (< pos src-len) (not (cur-sw? "\n")))
(begin (advance!) (skip-line!)))))
(define
scan!
(fn
()
(when
(< pos src-len)
(let
((ch (cur-byte)))
(cond
((or (= ch " ") (= ch "\t") (= ch "\r"))
(begin (advance!) (scan!)))
((= ch "\n")
(begin (advance!) (tok-push! :newline nil) (scan!)))
((cur-sw? "⍝") (begin (skip-line!) (scan!)))
((cur-sw? "⋄")
(begin (consume! "⋄") (tok-push! :diamond nil) (scan!)))
((= ch "(")
(begin (advance!) (tok-push! :lparen nil) (scan!)))
((= ch ")")
(begin (advance!) (tok-push! :rparen nil) (scan!)))
((= ch "[")
(begin (advance!) (tok-push! :lbracket nil) (scan!)))
((= ch "]")
(begin (advance!) (tok-push! :rbracket nil) (scan!)))
((= ch "{")
(begin (advance!) (tok-push! :lbrace nil) (scan!)))
((= ch "}")
(begin (advance!) (tok-push! :rbrace nil) (scan!)))
((= ch ";")
(begin (advance!) (tok-push! :semi nil) (scan!)))
((cur-sw? "←")
(begin (consume! "←") (tok-push! :assign nil) (scan!)))
((= ch ":")
(let
((start pos))
(begin
(advance!)
(if
(and (< pos src-len) (apl-alpha? (cur-byte)))
(begin
(read-ident-cont!)
(tok-push! :keyword (slice source start pos)))
(tok-push! :colon nil))
(scan!))))
((and (cur-sw? "¯") (< (+ pos (len "¯")) src-len) (apl-digit? (nth source (+ pos (len "¯")))))
(begin
(consume! "¯")
(let
((digits (read-digits! "")))
(if
(and
(< pos src-len)
(= (cur-byte) ".")
(< (+ pos 1) src-len)
(apl-digit? (nth source (+ pos 1))))
(begin
(advance!)
(let
((frac (read-digits! "")))
(tok-push!
:num (- 0 (string->number (str digits "." frac))))))
(tok-push! :num (- 0 (parse-int digits 0)))))
(scan!)))
((apl-digit? ch)
(begin
(let
((digits (read-digits! "")))
(if
(and
(< pos src-len)
(= (cur-byte) ".")
(< (+ pos 1) src-len)
(apl-digit? (nth source (+ pos 1))))
(begin
(advance!)
(let
((frac (read-digits! "")))
(tok-push!
:num (string->number (str digits "." frac)))))
(tok-push! :num (parse-int digits 0))))
(scan!)))
((= ch "'")
(begin
(advance!)
(let ((s (read-string! ""))) (tok-push! :str s))
(scan!)))
((or (apl-alpha? ch) (cur-sw? "⎕"))
(let
((start pos))
(begin
(if
(cur-sw? "⎕")
(begin
(consume! "⎕")
(if
(and (< pos src-len) (cur-sw? "←"))
(consume! "←")
(read-ident-cont!)))
(begin (advance!) (read-ident-cont!)))
(tok-push! :name (slice source start pos))
(scan!))))
(true
(let
((g (find-glyph)))
(if
g
(begin (consume! g) (tok-push! :glyph g) (scan!))
(begin (advance!) (scan!))))))))))
(scan!)
tokens)))