Files
rose-ash/lib/apl/parser.sx
giles aaabe370d6
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m29s
apl: bracket indexing A[I] → (I⌷A) (+7 tests, 415/415)
Parser: maybe-bracket helper wraps any value followed by [expr]
into (:dyad (:fn-glyph ⌷) idx val).  Wired into :name and :lparen
branches of collect-segments-loop.

apl-run "(10 20 30)[2]" → 20
apl-run "A ← 100 200 300 ⋄ A[2]" → 200
apl-run "(⍳5)[3] × 7" → 21

Multi-axis A[I;J] deferred — needs semicolon-split parsing.
2026-05-07 14:07:05 +00:00

529 lines
17 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
; APL Parser — right-to-left expression parser
;
; Takes a token list (output of apl-tokenize) and produces an AST.
; APL evaluates right-to-left with no precedence among functions.
; Operators bind to the function immediately to their left in the source.
;
; AST node types:
; (:num n) number literal
; (:str s) string literal
; (:vec n1 n2 ...) strand (juxtaposed literals)
; (:name "x") name reference / alpha / omega
; (:assign "x" expr) assignment x←expr
; (:monad fn arg) monadic function call
; (:dyad fn left right) dyadic function call
; (:derived-fn op fn) derived function: f/ f¨ f⍨
; (:derived-fn2 "." f g) inner product: f.g
; (:outer "∘." fn) outer product: ∘.f
; (:fn-glyph "") function reference
; (:fn-name "foo") named-function reference (dfn variable)
; (:dfn stmt...) {+⍵} anonymous function
; (:guard cond expr) cond:expr guard inside dfn
; (:program stmt...) multi-statement sequence
; ============================================================
; Glyph classification sets
; ============================================================
(define apl-parse-op-glyphs
(list "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@"))
(define apl-parse-fn-glyphs
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~"
"<" "≤" "=" "≥" ">" "≠" "∊" "∧" "" "⍱" "⍲"
"," "⍪" "" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
"" "∩" "" "⍸" "⌷" "⍋" "⍒" "⊥" "" "⊣" "⊢" "⍎" "⍕"))
(define apl-quad-fn-names (list "⎕FMT"))
(define
apl-parse-op-glyph?
(fn (v) (some (fn (g) (= g v)) apl-parse-op-glyphs)))
; ============================================================
; Token accessors
; ============================================================
(define
apl-parse-fn-glyph?
(fn (v) (some (fn (g) (= g v)) apl-parse-fn-glyphs)))
(define tok-type (fn (tok) (get tok :type)))
(define tok-val (fn (tok) (get tok :value)))
(define
is-op-tok?
(fn
(tok)
(and (= (tok-type tok) :glyph) (apl-parse-op-glyph? (tok-val tok)))))
; ============================================================
; Collect trailing operators starting at index i
; Returns {:ops (op ...) :end new-i}
; ============================================================
(define
is-fn-tok?
(fn
(tok)
(or
(and (= (tok-type tok) :glyph) (apl-parse-fn-glyph? (tok-val tok)))
(and
(= (tok-type tok) :name)
(some (fn (q) (= q (tok-val tok))) apl-quad-fn-names)))))
(define collect-ops (fn (tokens i) (collect-ops-loop tokens i (list))))
; ============================================================
; Build a derived-fn node by chaining operators left-to-right
; (+/¨ → (:derived-fn "¨" (:derived-fn "/" (:fn-glyph "+"))))
; ============================================================
(define
collect-ops-loop
(fn
(tokens i acc)
(if
(>= i (len tokens))
{:end i :ops acc}
(let
((tok (nth tokens i)))
(if
(is-op-tok? tok)
(collect-ops-loop tokens (+ i 1) (append acc (tok-val tok)))
{:end i :ops acc})))))
; ============================================================
; Find matching close bracket/paren/brace
; Returns the index of the matching close token
; ============================================================
(define
build-derived-fn
(fn
(fn-node ops)
(if
(= (len ops) 0)
fn-node
(build-derived-fn (list :derived-fn (first ops) fn-node) (rest ops)))))
(define
find-matching-close
(fn
(tokens start open-type close-type)
(find-matching-close-loop tokens start open-type close-type 1)))
; ============================================================
; Segment collection: scan tokens left-to-right, building
; a list of {:kind "val"/"fn" :node ast} segments.
; Operators following function glyphs are merged into
; derived-fn nodes during this pass.
; ============================================================
(define
find-matching-close-loop
(fn
(tokens i open-type close-type depth)
(if
(>= i (len tokens))
(len tokens)
(let
((tt (tok-type (nth tokens i))))
(cond
((= tt open-type)
(find-matching-close-loop
tokens
(+ i 1)
open-type
close-type
(+ depth 1)))
((= tt close-type)
(if
(= depth 1)
i
(find-matching-close-loop
tokens
(+ i 1)
open-type
close-type
(- depth 1))))
(true
(find-matching-close-loop
tokens
(+ i 1)
open-type
close-type
depth)))))))
(define
collect-segments
(fn (tokens) (collect-segments-loop tokens 0 (list))))
; ============================================================
; Build tree from segment list
;
; The segments are in left-to-right order.
; APL evaluates right-to-left, so the LEFTMOST function is
; the outermost (last-evaluated) node.
;
; Patterns:
; [val] → val node
; [fn val ...] → (:monad fn (build-tree rest))
; [val fn val ...] → (:dyad fn val (build-tree rest))
; [val val ...] → (:vec val1 val2 ...) — strand
; ============================================================
; Find the index of the first function segment (returns -1 if none)
(define
collect-segments-loop
(fn
(tokens i acc)
(if
(>= i (len tokens))
acc
(let
((tok (nth tokens i)) (n (len tokens)))
(let
((tt (tok-type tok)) (tv (tok-val tok)))
(cond
((or (= tt :diamond) (= tt :newline) (= tt :semi))
(collect-segments-loop tokens (+ i 1) acc))
((= tt :num)
(collect-segments-loop tokens (+ i 1) (append acc {:kind "val" :node (list :num tv)})))
((= tt :str)
(collect-segments-loop tokens (+ i 1) (append acc {:kind "val" :node (list :str tv)})))
((= tt :name)
(if
(some (fn (q) (= q tv)) apl-quad-fn-names)
(let
((op-result (collect-ops tokens (+ i 1))))
(let
((ops (get op-result :ops)) (ni (get op-result :end)))
(let
((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
(collect-segments-loop
tokens
ni
(append acc {:kind "fn" :node fn-node})))))
(let
((br (maybe-bracket (list :name tv) tokens (+ i 1))))
(collect-segments-loop
tokens
(nth br 1)
(append acc {:kind "val" :node (nth br 0)})))))
((= tt :lparen)
(let
((end (find-matching-close tokens (+ i 1) :lparen :rparen)))
(let
((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(let
((br (maybe-bracket (parse-apl-expr inner-tokens) tokens after)))
(collect-segments-loop
tokens
(nth br 1)
(append acc {:kind "val" :node (nth br 0)}))))))
((= tt :lbrace)
(let
((end (find-matching-close tokens (+ i 1) :lbrace :rbrace)))
(let
((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(collect-segments-loop tokens after (append acc {:kind "fn" :node (parse-dfn inner-tokens)})))))
((= tt :glyph)
(cond
((or (= tv "") (= tv "⍵"))
(collect-segments-loop
tokens
(+ i 1)
(append acc {:kind "val" :node (list :name tv)})))
((= tv "∇")
(collect-segments-loop
tokens
(+ i 1)
(append acc {:kind "fn" :node (list :fn-glyph "∇")})))
((and (= tv "∘") (< (+ i 1) n) (= (tok-val (nth tokens (+ i 1))) "."))
(if
(and (< (+ i 2) n) (is-fn-tok? (nth tokens (+ i 2))))
(let
((fn-tv (tok-val (nth tokens (+ i 2)))))
(let
((op-result (collect-ops tokens (+ i 3))))
(let
((ops (get op-result :ops))
(ni (get op-result :end)))
(let
((fn-node (build-derived-fn (list :fn-glyph fn-tv) ops)))
(collect-segments-loop
tokens
ni
(append acc {:kind "fn" :node (list :outer "∘." fn-node)}))))))
(collect-segments-loop tokens (+ i 1) acc)))
((apl-parse-fn-glyph? tv)
(let
((op-result (collect-ops tokens (+ i 1))))
(let
((ops (get op-result :ops))
(ni (get op-result :end)))
(if
(and
(= (len ops) 1)
(= (first ops) ".")
(< ni n)
(is-fn-tok? (nth tokens ni)))
(let
((g-tv (tok-val (nth tokens ni))))
(let
((op-result2 (collect-ops tokens (+ ni 1))))
(let
((ops2 (get op-result2 :ops))
(ni2 (get op-result2 :end)))
(let
((g-node (build-derived-fn (list :fn-glyph g-tv) ops2)))
(collect-segments-loop
tokens
ni2
(append acc {:kind "fn" :node (list :derived-fn2 "." (list :fn-glyph tv) g-node)}))))))
(let
((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
(collect-segments-loop
tokens
ni
(append acc {:kind "fn" :node fn-node})))))))
((apl-parse-op-glyph? tv)
(collect-segments-loop tokens (+ i 1) acc))
(true (collect-segments-loop tokens (+ i 1) acc))))
(true (collect-segments-loop tokens (+ i 1) acc))))))))
(define find-first-fn (fn (segs) (find-first-fn-loop segs 0)))
; Build an array node from 0..n value segments
; If n=1 → return that segment's node
; If n>1 → return (:vec node1 node2 ...)
(define
find-first-fn-loop
(fn
(segs i)
(if
(>= i (len segs))
-1
(if
(= (get (nth segs i) :kind) "fn")
i
(find-first-fn-loop segs (+ i 1))))))
(define
segs-to-array
(fn
(segs)
(if
(= (len segs) 1)
(get (first segs) :node)
(cons :vec (map (fn (s) (get s :node)) segs)))))
; ============================================================
; Split token list on statement separators (diamond / newline)
; Only splits at depth 0 (ignores separators inside { } or ( ) )
; ============================================================
(define
build-tree
(fn
(segs)
(cond
((= (len segs) 0) nil)
((= (len segs) 1) (get (first segs) :node))
((every? (fn (s) (= (get s :kind) "val")) segs)
(segs-to-array segs))
(true
(let
((fn-idx (find-first-fn segs)))
(cond
((= fn-idx -1) (segs-to-array segs))
((= fn-idx 0)
(list
:monad (get (first segs) :node)
(build-tree (rest segs))))
(true
(let
((left-segs (slice segs 0 fn-idx))
(fn-seg (nth segs fn-idx))
(right-segs (slice segs (+ fn-idx 1))))
(list
:dyad (get fn-seg :node)
(segs-to-array left-segs)
(build-tree right-segs))))))))))
(define
split-statements
(fn (tokens) (split-statements-loop tokens (list) (list) 0)))
; ============================================================
; Parse a dfn body (tokens between { and })
; Handles guard expressions: cond : expr
; ============================================================
(define
split-statements-loop
(fn
(tokens current-stmt acc depth)
(if
(= (len tokens) 0)
(if (> (len current-stmt) 0) (append acc (list current-stmt)) acc)
(let
((tok (first tokens))
(rest-toks (rest tokens))
(tt (tok-type (first tokens))))
(cond
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
(+ depth 1)))
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
(- depth 1)))
((and (> depth 0) (or (= tt :diamond) (= tt :newline)))
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
depth))
((and (= depth 0) (or (= tt :diamond) (= tt :newline)))
(if
(> (len current-stmt) 0)
(split-statements-loop
rest-toks
(list)
(append acc (list current-stmt))
depth)
(split-statements-loop rest-toks (list) acc depth)))
(true
(split-statements-loop
rest-toks
(append current-stmt tok)
acc
depth)))))))
(define
parse-dfn
(fn
(tokens)
(let
((stmt-groups (split-statements tokens)))
(let ((stmts (map parse-dfn-stmt stmt-groups))) (cons :dfn stmts)))))
(define
parse-dfn-stmt
(fn
(tokens)
(let
((colon-idx (find-top-level-colon tokens 0)))
(if
(>= colon-idx 0)
(let
((cond-tokens (slice tokens 0 colon-idx))
(body-tokens (slice tokens (+ colon-idx 1))))
(list
:guard (parse-apl-expr cond-tokens)
(parse-apl-expr body-tokens)))
(parse-stmt tokens)))))
(define
find-top-level-colon
(fn (tokens i) (find-top-level-colon-loop tokens i 0)))
; ============================================================
; Parse a single statement (assignment or expression)
; ============================================================
(define
find-top-level-colon-loop
(fn
(tokens i depth)
(if
(>= i (len tokens))
-1
(let
((tok (nth tokens i)) (tt (tok-type (nth tokens i))))
(cond
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(find-top-level-colon-loop tokens (+ i 1) (+ depth 1)))
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(find-top-level-colon-loop tokens (+ i 1) (- depth 1)))
((and (= tt :colon) (= depth 0)) i)
(true (find-top-level-colon-loop tokens (+ i 1) depth)))))))
; ============================================================
; Parse an expression from a flat token list
; ============================================================
(define
parse-stmt
(fn
(tokens)
(if
(and
(>= (len tokens) 2)
(= (tok-type (nth tokens 0)) :name)
(= (tok-type (nth tokens 1)) :assign))
(list
:assign (tok-val (nth tokens 0))
(parse-apl-expr (slice tokens 2)))
(parse-apl-expr tokens))))
; ============================================================
; Main entry point
; parse-apl: string → AST
; ============================================================
(define
parse-apl-expr
(fn
(tokens)
(let
((segs (collect-segments tokens)))
(if (= (len segs) 0) nil (build-tree segs)))))
(define
parse-apl
(fn
(src)
(let
((tokens (apl-tokenize src)))
(let
((stmt-groups (split-statements tokens)))
(if
(= (len stmt-groups) 0)
nil
(if
(= (len stmt-groups) 1)
(parse-stmt (first stmt-groups))
(cons :program (map parse-stmt stmt-groups))))))))
(define
maybe-bracket
(fn
(val-node tokens after)
(if
(and
(< after (len tokens))
(= (tok-type (nth tokens after)) :lbracket))
(let
((end (find-matching-close tokens (+ after 1) :lbracket :rbracket)))
(let
((inner-tokens (slice tokens (+ after 1) end))
(next-after (+ end 1)))
(let
((idx-expr (parse-apl-expr inner-tokens)))
(let
((indexed (list :dyad (list :fn-glyph "⌷") idx-expr val-node)))
(maybe-bracket indexed tokens next-after)))))
(list val-node after))))