Compare commits
2 Commits
architectu
...
loops/apl
| Author | SHA1 | Date | |
|---|---|---|---|
| dbba2fe418 | |||
| c73b696494 |
83
lib/apl/tests/parse.sx
Normal file
83
lib/apl/tests/parse.sx
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
(define apl-test-count 0)
|
||||||
|
(define apl-test-pass 0)
|
||||||
|
(define apl-test-fails (list))
|
||||||
|
|
||||||
|
(define apl-test
|
||||||
|
(fn (name actual expected)
|
||||||
|
(begin
|
||||||
|
(set! apl-test-count (+ apl-test-count 1))
|
||||||
|
(if (= actual expected)
|
||||||
|
(set! apl-test-pass (+ apl-test-pass 1))
|
||||||
|
(append! apl-test-fails {:name name :actual actual :expected expected})))))
|
||||||
|
|
||||||
|
(define tok-types
|
||||||
|
(fn (src)
|
||||||
|
(map (fn (t) (get t :type)) (apl-tokenize src))))
|
||||||
|
|
||||||
|
(define tok-values
|
||||||
|
(fn (src)
|
||||||
|
(map (fn (t) (get t :value)) (apl-tokenize src))))
|
||||||
|
|
||||||
|
(define tok-count
|
||||||
|
(fn (src)
|
||||||
|
(len (apl-tokenize src))))
|
||||||
|
|
||||||
|
(define tok-type-at
|
||||||
|
(fn (src i)
|
||||||
|
(get (nth (apl-tokenize src) i) :type)))
|
||||||
|
|
||||||
|
(define tok-value-at
|
||||||
|
(fn (src i)
|
||||||
|
(get (nth (apl-tokenize src) i) :value)))
|
||||||
|
|
||||||
|
(apl-test "empty: no tokens" (tok-count "") 0)
|
||||||
|
(apl-test "empty: whitespace only" (tok-count " ") 0)
|
||||||
|
(apl-test "num: zero" (tok-values "0") (list 0))
|
||||||
|
(apl-test "num: positive" (tok-values "42") (list 42))
|
||||||
|
(apl-test "num: large" (tok-values "12345") (list 12345))
|
||||||
|
(apl-test "num: negative" (tok-values "¯5") (list -5))
|
||||||
|
(apl-test "num: negative zero" (tok-values "¯0") (list 0))
|
||||||
|
(apl-test "num: strand count" (tok-count "1 2 3") 3)
|
||||||
|
(apl-test "num: strand types" (tok-types "1 2 3") (list :num :num :num))
|
||||||
|
(apl-test "num: strand values" (tok-values "1 2 3") (list 1 2 3))
|
||||||
|
(apl-test "num: neg in strand" (tok-values "1 ¯2 3") (list 1 -2 3))
|
||||||
|
(apl-test "str: empty" (tok-values "''") (list ""))
|
||||||
|
(apl-test "str: single char" (tok-values "'a'") (list "a"))
|
||||||
|
(apl-test "str: word" (tok-values "'hello'") (list "hello"))
|
||||||
|
(apl-test "str: escaped quote" (tok-values "''''") (list "'"))
|
||||||
|
(apl-test "str: type" (tok-types "'abc'") (list :str))
|
||||||
|
(apl-test "name: simple" (tok-values "foo") (list "foo"))
|
||||||
|
(apl-test "name: type" (tok-types "foo") (list :name))
|
||||||
|
(apl-test "name: mixed case" (tok-values "MyVar") (list "MyVar"))
|
||||||
|
(apl-test "name: with digits" (tok-values "x1") (list "x1"))
|
||||||
|
(apl-test "name: system var" (tok-values "⎕IO") (list "⎕IO"))
|
||||||
|
(apl-test "name: system var type" (tok-types "⎕IO") (list :name))
|
||||||
|
(apl-test "glyph: plus" (tok-types "+") (list :glyph))
|
||||||
|
(apl-test "glyph: plus value" (tok-values "+") (list "+"))
|
||||||
|
(apl-test "glyph: iota" (tok-values "⍳") (list "⍳"))
|
||||||
|
(apl-test "glyph: reduce" (tok-values "+/") (list "+" "/"))
|
||||||
|
(apl-test "glyph: floor" (tok-values "⌊") (list "⌊"))
|
||||||
|
(apl-test "glyph: rho" (tok-values "⍴") (list "⍴"))
|
||||||
|
(apl-test "glyph: alpha omega" (tok-types "⍺ ⍵") (list :glyph :glyph))
|
||||||
|
(apl-test "punct: lparen" (tok-types "(") (list :lparen))
|
||||||
|
(apl-test "punct: rparen" (tok-types ")") (list :rparen))
|
||||||
|
(apl-test "punct: brackets" (tok-types "[42]") (list :lbracket :num :rbracket))
|
||||||
|
(apl-test "punct: braces" (tok-types "{}") (list :lbrace :rbrace))
|
||||||
|
(apl-test "punct: semi" (tok-types ";") (list :semi))
|
||||||
|
(apl-test "assign: arrow" (tok-types "x←1") (list :name :assign :num))
|
||||||
|
(apl-test "diamond: separator" (tok-types "1⋄2") (list :num :diamond :num))
|
||||||
|
(apl-test "newline: emitted" (tok-types "1\n2") (list :num :newline :num))
|
||||||
|
(apl-test "comment: skipped" (tok-count "⍝ ignore me") 0)
|
||||||
|
(apl-test "comment: rest ignored" (tok-count "1 ⍝ note") 1)
|
||||||
|
(apl-test "colon: bare" (tok-types ":") (list :colon))
|
||||||
|
(apl-test "keyword: If" (tok-values ":If") (list ":If"))
|
||||||
|
(apl-test "keyword: type" (tok-types ":While") (list :keyword))
|
||||||
|
(apl-test "keyword: EndFor" (tok-values ":EndFor") (list ":EndFor"))
|
||||||
|
(apl-test "expr: +/ ⍳ 5" (tok-types "+/ ⍳ 5") (list :glyph :glyph :glyph :num))
|
||||||
|
(apl-test "expr: x←42" (tok-count "x←42") 3)
|
||||||
|
(apl-test "expr: dfn body" (tok-types "{⍺+⍵}")
|
||||||
|
(list :lbrace :glyph :glyph :glyph :rbrace))
|
||||||
|
|
||||||
|
(define apl-tokenize-test-summary
|
||||||
|
(str "tokenizer " apl-test-pass "/" apl-test-count
|
||||||
|
(if (= (len apl-test-fails) 0) "" (str " FAILS: " apl-test-fails))))
|
||||||
168
lib/apl/tokenizer.sx
Normal file
168
lib/apl/tokenizer.sx
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
(define apl-glyph-set
|
||||||
|
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~" "<" "≤" "=" "≥" ">" "≠"
|
||||||
|
"∊" "∧" "∨" "⍱" "⍲" "," "⍪" "⍴" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
|
||||||
|
"∪" "∩" "⍳" "⍸" "⌷" "⍋" "⍒" "⊥" "⊤" "⊣" "⊢" "⍎" "⍕"
|
||||||
|
"⍺" "⍵" "∇" "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
|
||||||
|
|
||||||
|
(define apl-glyph?
|
||||||
|
(fn (ch)
|
||||||
|
(some (fn (g) (= g ch)) apl-glyph-set)))
|
||||||
|
|
||||||
|
(define apl-digit?
|
||||||
|
(fn (ch)
|
||||||
|
(and (string? ch) (>= ch "0") (<= ch "9"))))
|
||||||
|
|
||||||
|
(define apl-alpha?
|
||||||
|
(fn (ch)
|
||||||
|
(and (string? ch)
|
||||||
|
(or (and (>= ch "a") (<= ch "z"))
|
||||||
|
(and (>= ch "A") (<= ch "Z"))
|
||||||
|
(= ch "_")))))
|
||||||
|
|
||||||
|
(define apl-tokenize
|
||||||
|
(fn (source)
|
||||||
|
(let ((pos 0)
|
||||||
|
(src-len (len source))
|
||||||
|
(tokens (list)))
|
||||||
|
|
||||||
|
(define tok-push!
|
||||||
|
(fn (type value)
|
||||||
|
(append! tokens {:type type :value value})))
|
||||||
|
|
||||||
|
(define cur-sw?
|
||||||
|
(fn (ch)
|
||||||
|
(and (< pos src-len) (starts-with? (slice source pos) ch))))
|
||||||
|
|
||||||
|
(define cur-byte
|
||||||
|
(fn ()
|
||||||
|
(if (< pos src-len) (nth source pos) nil)))
|
||||||
|
|
||||||
|
(define advance!
|
||||||
|
(fn ()
|
||||||
|
(set! pos (+ pos 1))))
|
||||||
|
|
||||||
|
(define consume!
|
||||||
|
(fn (ch)
|
||||||
|
(set! pos (+ pos (len ch)))))
|
||||||
|
|
||||||
|
(define find-glyph
|
||||||
|
(fn ()
|
||||||
|
(let ((rem (slice source pos)))
|
||||||
|
(let ((matches (filter (fn (g) (starts-with? rem g)) apl-glyph-set)))
|
||||||
|
(if (> (len matches) 0) (first matches) nil)))))
|
||||||
|
|
||||||
|
(define read-digits!
|
||||||
|
(fn (acc)
|
||||||
|
(if (and (< pos src-len) (apl-digit? (cur-byte)))
|
||||||
|
(let ((ch (cur-byte)))
|
||||||
|
(begin
|
||||||
|
(advance!)
|
||||||
|
(read-digits! (str acc ch))))
|
||||||
|
acc)))
|
||||||
|
|
||||||
|
(define read-ident-cont!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len)
|
||||||
|
(let ((ch (cur-byte)))
|
||||||
|
(or (apl-alpha? ch) (apl-digit? ch))))
|
||||||
|
(begin
|
||||||
|
(advance!)
|
||||||
|
(read-ident-cont!)))))
|
||||||
|
|
||||||
|
(define read-string!
|
||||||
|
(fn (acc)
|
||||||
|
(cond
|
||||||
|
((>= pos src-len) acc)
|
||||||
|
((cur-sw? "'")
|
||||||
|
(if (and (< (+ pos 1) src-len) (cur-sw? "'"))
|
||||||
|
(begin
|
||||||
|
(advance!)
|
||||||
|
(advance!)
|
||||||
|
(read-string! (str acc "'")))
|
||||||
|
(begin (advance!) acc)))
|
||||||
|
(true
|
||||||
|
(let ((ch (cur-byte)))
|
||||||
|
(begin
|
||||||
|
(advance!)
|
||||||
|
(read-string! (str acc ch))))))))
|
||||||
|
|
||||||
|
(define skip-line!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len) (not (cur-sw? "\n")))
|
||||||
|
(begin
|
||||||
|
(advance!)
|
||||||
|
(skip-line!)))))
|
||||||
|
|
||||||
|
(define scan!
|
||||||
|
(fn ()
|
||||||
|
(when (< pos src-len)
|
||||||
|
(let ((ch (cur-byte)))
|
||||||
|
(cond
|
||||||
|
((or (= ch " ") (= ch "\t") (= ch "\r"))
|
||||||
|
(begin (advance!) (scan!)))
|
||||||
|
((= ch "\n")
|
||||||
|
(begin (advance!) (tok-push! :newline nil) (scan!)))
|
||||||
|
((cur-sw? "⍝")
|
||||||
|
(begin (skip-line!) (scan!)))
|
||||||
|
((cur-sw? "⋄")
|
||||||
|
(begin (consume! "⋄") (tok-push! :diamond nil) (scan!)))
|
||||||
|
((= ch "(")
|
||||||
|
(begin (advance!) (tok-push! :lparen nil) (scan!)))
|
||||||
|
((= ch ")")
|
||||||
|
(begin (advance!) (tok-push! :rparen nil) (scan!)))
|
||||||
|
((= ch "[")
|
||||||
|
(begin (advance!) (tok-push! :lbracket nil) (scan!)))
|
||||||
|
((= ch "]")
|
||||||
|
(begin (advance!) (tok-push! :rbracket nil) (scan!)))
|
||||||
|
((= ch "{")
|
||||||
|
(begin (advance!) (tok-push! :lbrace nil) (scan!)))
|
||||||
|
((= ch "}")
|
||||||
|
(begin (advance!) (tok-push! :rbrace nil) (scan!)))
|
||||||
|
((= ch ";")
|
||||||
|
(begin (advance!) (tok-push! :semi nil) (scan!)))
|
||||||
|
((cur-sw? "←")
|
||||||
|
(begin (consume! "←") (tok-push! :assign nil) (scan!)))
|
||||||
|
((= ch ":")
|
||||||
|
(let ((start pos))
|
||||||
|
(begin
|
||||||
|
(advance!)
|
||||||
|
(if (and (< pos src-len) (apl-alpha? (cur-byte)))
|
||||||
|
(begin
|
||||||
|
(read-ident-cont!)
|
||||||
|
(tok-push! :keyword (slice source start pos)))
|
||||||
|
(tok-push! :colon nil))
|
||||||
|
(scan!))))
|
||||||
|
((and (cur-sw? "¯")
|
||||||
|
(< (+ pos (len "¯")) src-len)
|
||||||
|
(apl-digit? (nth source (+ pos (len "¯")))))
|
||||||
|
(begin
|
||||||
|
(consume! "¯")
|
||||||
|
(let ((digits (read-digits! "")))
|
||||||
|
(tok-push! :num (- 0 (parse-int digits 0))))
|
||||||
|
(scan!)))
|
||||||
|
((apl-digit? ch)
|
||||||
|
(begin
|
||||||
|
(let ((digits (read-digits! "")))
|
||||||
|
(tok-push! :num (parse-int digits 0)))
|
||||||
|
(scan!)))
|
||||||
|
((= ch "'")
|
||||||
|
(begin
|
||||||
|
(advance!)
|
||||||
|
(let ((s (read-string! "")))
|
||||||
|
(tok-push! :str s))
|
||||||
|
(scan!)))
|
||||||
|
((or (apl-alpha? ch) (cur-sw? "⎕"))
|
||||||
|
(let ((start pos))
|
||||||
|
(begin
|
||||||
|
(if (cur-sw? "⎕") (consume! "⎕") (advance!))
|
||||||
|
(read-ident-cont!)
|
||||||
|
(tok-push! :name (slice source start pos))
|
||||||
|
(scan!))))
|
||||||
|
(true
|
||||||
|
(let ((g (find-glyph)))
|
||||||
|
(if g
|
||||||
|
(begin (consume! g) (tok-push! :glyph g) (scan!))
|
||||||
|
(begin (advance!) (scan!))))))))))
|
||||||
|
|
||||||
|
(scan!)
|
||||||
|
tokens)))
|
||||||
@@ -48,7 +48,7 @@ Core mapping:
|
|||||||
## Roadmap
|
## Roadmap
|
||||||
|
|
||||||
### Phase 1 — tokenizer + parser
|
### Phase 1 — tokenizer + parser
|
||||||
- [ ] Tokenizer: Unicode glyphs (the full APL set: `+ - × ÷ * ⍟ ⌈ ⌊ | ! ? ○ ~ < ≤ = ≥ > ≠ ∊ ∧ ∨ ⍱ ⍲ , ⍪ ⍴ ⌽ ⊖ ⍉ ↑ ↓ ⊂ ⊃ ⊆ ∪ ∩ ⍳ ⍸ ⌷ ⍋ ⍒ ⊥ ⊤ ⊣ ⊢ ⍎ ⍕ ⍝`), operators (`/ \ ¨ ⍨ ∘ . ⍣ ⍤ ⍥ @`), numbers (`¯` for negative, `1E2`, `1J2` complex deferred), characters (`'a'`, `''` escape), strands (juxtaposition of literals: `1 2 3`), names, comments `⍝ …`
|
- [x] Tokenizer: Unicode glyphs (the full APL set: `+ - × ÷ * ⍟ ⌈ ⌊ | ! ? ○ ~ < ≤ = ≥ > ≠ ∊ ∧ ∨ ⍱ ⍲ , ⍪ ⍴ ⌽ ⊖ ⍉ ↑ ↓ ⊂ ⊃ ⊆ ∪ ∩ ⍳ ⍸ ⌷ ⍋ ⍒ ⊥ ⊤ ⊣ ⊢ ⍎ ⍕ ⍝`), operators (`/ \ ¨ ⍨ ∘ . ⍣ ⍤ ⍥ @`), numbers (`¯` for negative, `1E2`, `1J2` complex deferred), characters (`'a'`, `''` escape), strands (juxtaposition of literals: `1 2 3`), names, comments `⍝ …`
|
||||||
- [ ] Parser: right-to-left; classify each token as function, operator, value, or name; resolve valence positionally; dfn `{…}` body, tradfn `∇` header, guards `:`, control words `:If :While :For …` (Dyalog-style)
|
- [ ] Parser: right-to-left; classify each token as function, operator, value, or name; resolve valence positionally; dfn `{…}` body, tradfn `∇` header, guards `:`, control words `:If :While :For …` (Dyalog-style)
|
||||||
- [ ] Unit tests in `lib/apl/tests/parse.sx`
|
- [ ] Unit tests in `lib/apl/tests/parse.sx`
|
||||||
|
|
||||||
@@ -108,7 +108,7 @@ Core mapping:
|
|||||||
|
|
||||||
_Newest first._
|
_Newest first._
|
||||||
|
|
||||||
- _(none yet)_
|
- 2026-04-25: tokenizer (Phase 1 step 1) — 46/46 tests green; Unicode-aware starts-with? scanner for multi-byte APL glyphs; `lib/apl/tokenizer.sx` + `lib/apl/tests/parse.sx`
|
||||||
|
|
||||||
## Blockers
|
## Blockers
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user