Files
rose-ash/lib/apl/parser.sx
giles f5d3b1df19
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 50s
apl: ⍵-rebind + primes.apl runs as-written (+4 tests)
Two changes wire the original primes idiom through:

1. Parser :glyph branch detects ⍵← / ⍺← and emits :assign-expr
   (was only :name-token before).
2. Eval-ast :name lookup checks env["⍵"]/env["⍺"] before falling
   back to env["omega"]/env["alpha"].  Inline ⍵-rebind binds
   under the glyph key directly.

apl-run "primes ← {(2=+⌿0=⍵∘.|⍵)/⍵←⍳⍵} ⋄ primes 50"
→ 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47

primes.apl now runs as-written via apl-run-file + " ⋄ primes 30".
2026-05-07 23:19:45 +00:00

702 lines
22 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
; APL Parser — right-to-left expression parser
;
; Takes a token list (output of apl-tokenize) and produces an AST.
; APL evaluates right-to-left with no precedence among functions.
; Operators bind to the function immediately to their left in the source.
;
; AST node types:
; (:num n) number literal
; (:str s) string literal
; (:vec n1 n2 ...) strand (juxtaposed literals)
; (:name "x") name reference / alpha / omega
; (:assign "x" expr) assignment x←expr
; (:monad fn arg) monadic function call
; (:dyad fn left right) dyadic function call
; (:derived-fn op fn) derived function: f/ f¨ f⍨
; (:derived-fn2 "." f g) inner product: f.g
; (:outer "∘." fn) outer product: ∘.f
; (:fn-glyph "") function reference
; (:fn-name "foo") named-function reference (dfn variable)
; (:dfn stmt...) {+⍵} anonymous function
; (:guard cond expr) cond:expr guard inside dfn
; (:program stmt...) multi-statement sequence
; ============================================================
; Glyph classification sets
; ============================================================
(define
apl-parse-op-glyphs
(list "/" "⌿" "\\" "⍀" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@"))
(define
apl-parse-fn-glyphs
(list
"+"
"-"
"×"
"÷"
"*"
"⍟"
"⌈"
"⌊"
"|"
"!"
"?"
"○"
"~"
"<"
"≤"
"="
"≥"
">"
"≠"
"≢"
"≡"
"∊"
"∧"
""
"⍱"
"⍲"
","
"⍪"
""
"⌽"
"⊖"
"⍉"
"↑"
"↓"
"⊂"
"⊃"
"⊆"
""
"∩"
""
"⍸"
"⌷"
"⍋"
"⍒"
"⊥"
""
"⊣"
"⊢"
"⍎"
"⍕"))
(define apl-quad-fn-names (list "⎕FMT" "⎕←"))
(define apl-known-fn-names (list))
; ============================================================
; Token accessors
; ============================================================
(define
apl-collect-fn-bindings
(fn
(stmt-groups)
(set! apl-known-fn-names (list))
(for-each
(fn
(toks)
(when
(and
(>= (len toks) 3)
(= (tok-type (nth toks 0)) :name)
(= (tok-type (nth toks 1)) :assign)
(= (tok-type (nth toks 2)) :lbrace))
(set!
apl-known-fn-names
(cons (tok-val (nth toks 0)) apl-known-fn-names))))
stmt-groups)))
(define
apl-parse-op-glyph?
(fn (v) (some (fn (g) (= g v)) apl-parse-op-glyphs)))
(define
apl-parse-fn-glyph?
(fn (v) (some (fn (g) (= g v)) apl-parse-fn-glyphs)))
(define tok-type (fn (tok) (get tok :type)))
; ============================================================
; Collect trailing operators starting at index i
; Returns {:ops (op ...) :end new-i}
; ============================================================
(define tok-val (fn (tok) (get tok :value)))
(define
is-op-tok?
(fn
(tok)
(and (= (tok-type tok) :glyph) (apl-parse-op-glyph? (tok-val tok)))))
; ============================================================
; Build a derived-fn node by chaining operators left-to-right
; (+/¨ → (:derived-fn "¨" (:derived-fn "/" (:fn-glyph "+"))))
; ============================================================
(define
is-fn-tok?
(fn
(tok)
(or
(and (= (tok-type tok) :glyph) (apl-parse-fn-glyph? (tok-val tok)))
(and
(= (tok-type tok) :name)
(or
(some (fn (q) (= q (tok-val tok))) apl-quad-fn-names)
(some (fn (q) (= q (tok-val tok))) apl-known-fn-names))))))
; ============================================================
; Find matching close bracket/paren/brace
; Returns the index of the matching close token
; ============================================================
(define collect-ops (fn (tokens i) (collect-ops-loop tokens i (list))))
(define
collect-ops-loop
(fn
(tokens i acc)
(if
(>= i (len tokens))
{:end i :ops acc}
(let
((tok (nth tokens i)))
(if
(is-op-tok? tok)
(collect-ops-loop tokens (+ i 1) (append acc (tok-val tok)))
{:end i :ops acc})))))
; ============================================================
; Segment collection: scan tokens left-to-right, building
; a list of {:kind "val"/"fn" :node ast} segments.
; Operators following function glyphs are merged into
; derived-fn nodes during this pass.
; ============================================================
(define
build-derived-fn
(fn
(fn-node ops)
(if
(= (len ops) 0)
fn-node
(build-derived-fn (list :derived-fn (first ops) fn-node) (rest ops)))))
(define
find-matching-close
(fn
(tokens start open-type close-type)
(find-matching-close-loop tokens start open-type close-type 1)))
; ============================================================
; Build tree from segment list
;
; The segments are in left-to-right order.
; APL evaluates right-to-left, so the LEFTMOST function is
; the outermost (last-evaluated) node.
;
; Patterns:
; [val] → val node
; [fn val ...] → (:monad fn (build-tree rest))
; [val fn val ...] → (:dyad fn val (build-tree rest))
; [val val ...] → (:vec val1 val2 ...) — strand
; ============================================================
; Find the index of the first function segment (returns -1 if none)
(define
find-matching-close-loop
(fn
(tokens i open-type close-type depth)
(if
(>= i (len tokens))
(len tokens)
(let
((tt (tok-type (nth tokens i))))
(cond
((= tt open-type)
(find-matching-close-loop
tokens
(+ i 1)
open-type
close-type
(+ depth 1)))
((= tt close-type)
(if
(= depth 1)
i
(find-matching-close-loop
tokens
(+ i 1)
open-type
close-type
(- depth 1))))
(true
(find-matching-close-loop
tokens
(+ i 1)
open-type
close-type
depth)))))))
(define
collect-segments
(fn (tokens) (collect-segments-loop tokens 0 (list))))
; Build an array node from 0..n value segments
; If n=1 → return that segment's node
; If n>1 → return (:vec node1 node2 ...)
(define
collect-segments-loop
(fn
(tokens i acc)
(if
(>= i (len tokens))
acc
(let
((tok (nth tokens i)) (n (len tokens)))
(let
((tt (tok-type tok)) (tv (tok-val tok)))
(cond
((or (= tt :diamond) (= tt :newline) (= tt :semi))
(collect-segments-loop tokens (+ i 1) acc))
((= tt :num)
(collect-segments-loop tokens (+ i 1) (append acc {:kind "val" :node (list :num tv)})))
((= tt :str)
(collect-segments-loop tokens (+ i 1) (append acc {:kind "val" :node (list :str tv)})))
((= tt :name)
(cond
((and (< (+ i 1) (len tokens)) (= (tok-type (nth tokens (+ i 1))) :assign))
(let
((rhs-tokens (slice tokens (+ i 2) (len tokens))))
(let
((rhs-expr (parse-apl-expr rhs-tokens)))
(collect-segments-loop
tokens
(len tokens)
(append acc {:kind "val" :node (list :assign-expr tv rhs-expr)})))))
((some (fn (q) (= q tv)) apl-quad-fn-names)
(let
((op-result (collect-ops tokens (+ i 1))))
(let
((ops (get op-result :ops))
(ni (get op-result :end)))
(let
((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
(collect-segments-loop
tokens
ni
(append acc {:kind "fn" :node fn-node}))))))
((some (fn (q) (= q tv)) apl-known-fn-names)
(let
((op-result (collect-ops tokens (+ i 1))))
(let
((ops (get op-result :ops))
(ni (get op-result :end)))
(let
((fn-node (build-derived-fn (list :fn-name tv) ops)))
(collect-segments-loop
tokens
ni
(append acc {:kind "fn" :node fn-node}))))))
(else
(let
((br (maybe-bracket (list :name tv) tokens (+ i 1))))
(collect-segments-loop
tokens
(nth br 1)
(append acc {:kind "val" :node (nth br 0)}))))))
((= tt :lparen)
(let
((end (find-matching-close tokens (+ i 1) :lparen :rparen)))
(let
((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(let
((inner-segs (collect-segments inner-tokens)))
(if
(and
(>= (len inner-segs) 2)
(every? (fn (s) (= (get s :kind) "fn")) inner-segs))
(let
((train-node (cons :train (map (fn (s) (get s :node)) inner-segs))))
(collect-segments-loop
tokens
after
(append acc {:kind "fn" :node train-node})))
(let
((br (maybe-bracket (parse-apl-expr inner-tokens) tokens after)))
(collect-segments-loop
tokens
(nth br 1)
(append acc {:kind "val" :node (nth br 0)}))))))))
((= tt :lbrace)
(let
((end (find-matching-close tokens (+ i 1) :lbrace :rbrace)))
(let
((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(collect-segments-loop tokens after (append acc {:kind "fn" :node (parse-dfn inner-tokens)})))))
((= tt :glyph)
(cond
((or (= tv "") (= tv "⍵"))
(if
(and
(< (+ i 1) (len tokens))
(= (tok-type (nth tokens (+ i 1))) :assign))
(let
((rhs-tokens (slice tokens (+ i 2) (len tokens))))
(let
((rhs-expr (parse-apl-expr rhs-tokens)))
(collect-segments-loop
tokens
(len tokens)
(append acc {:kind "val" :node (list :assign-expr tv rhs-expr)}))))
(collect-segments-loop
tokens
(+ i 1)
(append acc {:kind "val" :node (list :name tv)}))))
((= tv "∇")
(collect-segments-loop
tokens
(+ i 1)
(append acc {:kind "fn" :node (list :fn-glyph "∇")})))
((and (= tv "∘") (< (+ i 1) n) (= (tok-val (nth tokens (+ i 1))) "."))
(if
(and (< (+ i 2) n) (is-fn-tok? (nth tokens (+ i 2))))
(let
((fn-tv (tok-val (nth tokens (+ i 2)))))
(let
((op-result (collect-ops tokens (+ i 3))))
(let
((ops (get op-result :ops))
(ni (get op-result :end)))
(let
((fn-node (build-derived-fn (list :fn-glyph fn-tv) ops)))
(collect-segments-loop
tokens
ni
(append acc {:kind "fn" :node (list :outer "∘." fn-node)}))))))
(collect-segments-loop tokens (+ i 1) acc)))
((apl-parse-fn-glyph? tv)
(let
((op-result (collect-ops tokens (+ i 1))))
(let
((ops (get op-result :ops))
(ni (get op-result :end)))
(if
(and
(= (len ops) 1)
(= (first ops) ".")
(< ni n)
(is-fn-tok? (nth tokens ni)))
(let
((g-tv (tok-val (nth tokens ni))))
(let
((op-result2 (collect-ops tokens (+ ni 1))))
(let
((ops2 (get op-result2 :ops))
(ni2 (get op-result2 :end)))
(let
((g-node (build-derived-fn (list :fn-glyph g-tv) ops2)))
(collect-segments-loop
tokens
ni2
(append acc {:kind "fn" :node (list :derived-fn2 "." (list :fn-glyph tv) g-node)}))))))
(let
((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
(collect-segments-loop
tokens
ni
(append acc {:kind "fn" :node fn-node})))))))
((apl-parse-op-glyph? tv)
(if
(or (= tv "/") (= tv "⌿") (= tv "\\") (= tv "⍀"))
(collect-segments-loop
tokens
(+ i 1)
(append acc {:kind "fn" :node (list :fn-glyph tv)}))
(collect-segments-loop tokens (+ i 1) acc)))
(true (collect-segments-loop tokens (+ i 1) acc))))
(true (collect-segments-loop tokens (+ i 1) acc))))))))
(define find-first-fn (fn (segs) (find-first-fn-loop segs 0)))
; ============================================================
; Split token list on statement separators (diamond / newline)
; Only splits at depth 0 (ignores separators inside { } or ( ) )
; ============================================================
(define
find-first-fn-loop
(fn
(segs i)
(if
(>= i (len segs))
-1
(if
(= (get (nth segs i) :kind) "fn")
i
(find-first-fn-loop segs (+ i 1))))))
(define
segs-to-array
(fn
(segs)
(if
(= (len segs) 1)
(get (first segs) :node)
(cons :vec (map (fn (s) (get s :node)) segs)))))
; ============================================================
; Parse a dfn body (tokens between { and })
; Handles guard expressions: cond : expr
; ============================================================
(define
build-tree
(fn
(segs)
(cond
((= (len segs) 0) nil)
((= (len segs) 1) (get (first segs) :node))
((every? (fn (s) (= (get s :kind) "val")) segs)
(segs-to-array segs))
(true
(let
((fn-idx (find-first-fn segs)))
(cond
((= fn-idx -1) (segs-to-array segs))
((= fn-idx 0)
(list
:monad (get (first segs) :node)
(build-tree (rest segs))))
(true
(let
((left-segs (slice segs 0 fn-idx))
(fn-seg (nth segs fn-idx))
(right-segs (slice segs (+ fn-idx 1))))
(list
:dyad (get fn-seg :node)
(segs-to-array left-segs)
(build-tree right-segs))))))))))
(define
split-statements
(fn (tokens) (split-statements-loop tokens (list) (list) 0)))
(define
split-statements-loop
(fn
(tokens current-stmt acc depth)
(if
(= (len tokens) 0)
(if (> (len current-stmt) 0) (append acc (list current-stmt)) acc)
(let
((tok (first tokens))
(rest-toks (rest tokens))
(tt (tok-type (first tokens))))
(cond
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
(+ depth 1)))
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
(- depth 1)))
((and (> depth 0) (or (= tt :diamond) (= tt :newline)))
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
depth))
((and (= depth 0) (or (= tt :diamond) (= tt :newline)))
(if
(> (len current-stmt) 0)
(split-statements-loop
rest-toks
(list)
(append acc (list current-stmt))
depth)
(split-statements-loop rest-toks (list) acc depth)))
(true
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
depth)))))))
(define
parse-dfn
(fn
(tokens)
(let
((stmt-groups (split-statements tokens)))
(let ((stmts (map parse-dfn-stmt stmt-groups))) (cons :dfn stmts)))))
; ============================================================
; Parse a single statement (assignment or expression)
; ============================================================
(define
parse-dfn-stmt
(fn
(tokens)
(let
((colon-idx (find-top-level-colon tokens 0)))
(if
(>= colon-idx 0)
(let
((cond-tokens (slice tokens 0 colon-idx))
(body-tokens (slice tokens (+ colon-idx 1))))
(list
:guard (parse-apl-expr cond-tokens)
(parse-apl-expr body-tokens)))
(parse-stmt tokens)))))
; ============================================================
; Parse an expression from a flat token list
; ============================================================
(define
find-top-level-colon
(fn (tokens i) (find-top-level-colon-loop tokens i 0)))
; ============================================================
; Main entry point
; parse-apl: string → AST
; ============================================================
(define
find-top-level-colon-loop
(fn
(tokens i depth)
(if
(>= i (len tokens))
-1
(let
((tok (nth tokens i)) (tt (tok-type (nth tokens i))))
(cond
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(find-top-level-colon-loop tokens (+ i 1) (+ depth 1)))
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(find-top-level-colon-loop tokens (+ i 1) (- depth 1)))
((and (= tt :colon) (= depth 0)) i)
(true (find-top-level-colon-loop tokens (+ i 1) depth)))))))
(define
parse-stmt
(fn
(tokens)
(if
(and
(>= (len tokens) 2)
(= (tok-type (nth tokens 0)) :name)
(= (tok-type (nth tokens 1)) :assign))
(list
:assign (tok-val (nth tokens 0))
(parse-apl-expr (slice tokens 2)))
(parse-apl-expr tokens))))
(define
parse-apl-expr
(fn
(tokens)
(let
((segs (collect-segments tokens)))
(if (= (len segs) 0) nil (build-tree segs)))))
(define
parse-apl
(fn
(src)
(let
((tokens (apl-tokenize src)))
(let
((stmt-groups (split-statements tokens)))
(begin
(apl-collect-fn-bindings stmt-groups)
(if
(= (len stmt-groups) 0)
nil
(if
(= (len stmt-groups) 1)
(parse-stmt (first stmt-groups))
(cons :program (map parse-stmt stmt-groups)))))))))
(define
split-bracket-loop
(fn
(tokens current acc depth)
(if
(= (len tokens) 0)
(append acc (list current))
(let
((tok (first tokens)) (more (rest tokens)))
(let
((tt (tok-type tok)))
(cond
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(split-bracket-loop
more
(append current (list tok))
acc
(+ depth 1)))
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(split-bracket-loop
more
(append current (list tok))
acc
(- depth 1)))
((and (= tt :semi) (= depth 0))
(split-bracket-loop
more
(list)
(append acc (list current))
depth))
(else
(split-bracket-loop more (append current (list tok)) acc depth))))))))
(define
split-bracket-content
(fn (tokens) (split-bracket-loop tokens (list) (list) 0)))
(define
maybe-bracket
(fn
(val-node tokens after)
(if
(and
(< after (len tokens))
(= (tok-type (nth tokens after)) :lbracket))
(let
((end (find-matching-close tokens (+ after 1) :lbracket :rbracket)))
(let
((inner-tokens (slice tokens (+ after 1) end))
(next-after (+ end 1)))
(let
((sections (split-bracket-content inner-tokens)))
(if
(= (len sections) 1)
(let
((idx-expr (parse-apl-expr inner-tokens)))
(let
((indexed (list :dyad (list :fn-glyph "⌷") idx-expr val-node)))
(maybe-bracket indexed tokens next-after)))
(let
((axis-exprs (map (fn (toks) (if (= (len toks) 0) :all (parse-apl-expr toks))) sections)))
(let
((indexed (cons :bracket (cons val-node axis-exprs))))
(maybe-bracket indexed tokens next-after)))))))
(list val-node after))))