Files
rose-ash/lib/apl/parser.sx
giles da8ba104a6
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 17s
apl: right-to-left parser + 44 tests (Phase 1, step 2)
Implement lib/apl/parser.sx — APL expression parser:
- Segment-based algorithm: scan L→R collecting {fn,val} segments
- build-tree constructs AST with leftmost-fn = root (right-to-left semantics)
- Handles: monadic/dyadic fns, strands (:vec), assignment (:assign)
- Operators: derived-fn (:derived-fn op fn), inner product (:derived-fn2)
- Outer product ∘.f (:outer), dfns {:dfn stmt...}, guards (:guard cond expr)
- split-statements is bracket-aware (depth tracking prevents splitting inside {})

44 new parser tests + 46 existing tokenizer = 90/90 green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-26 14:05:43 +00:00

437 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
; APL Parser — right-to-left expression parser
;
; Takes a token list (output of apl-tokenize) and produces an AST.
; APL evaluates right-to-left with no precedence among functions.
; Operators bind to the function immediately to their left in the source.
;
; AST node types:
; (:num n) number literal
; (:str s) string literal
; (:vec n1 n2 ...) strand (juxtaposed literals)
; (:name "x") name reference / alpha / omega
; (:assign "x" expr) assignment x←expr
; (:monad fn arg) monadic function call
; (:dyad fn left right) dyadic function call
; (:derived-fn op fn) derived function: f/ f¨ f⍨
; (:derived-fn2 "." f g) inner product: f.g
; (:outer "∘." fn) outer product: ∘.f
; (:fn-glyph "") function reference
; (:fn-name "foo") named-function reference (dfn variable)
; (:dfn stmt...) {+⍵} anonymous function
; (:guard cond expr) cond:expr guard inside dfn
; (:program stmt...) multi-statement sequence
; ============================================================
; Glyph classification sets
; ============================================================
(define apl-parse-op-glyphs
(list "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@"))
(define apl-parse-fn-glyphs
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~"
"<" "≤" "=" "≥" ">" "≠" "∊" "∧" "" "⍱" "⍲"
"," "⍪" "" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
"" "∩" "" "⍸" "⌷" "⍋" "⍒" "⊥" "" "⊣" "⊢" "⍎" "⍕"))
(define apl-parse-op-glyph?
(fn (v)
(some (fn (g) (= g v)) apl-parse-op-glyphs)))
(define apl-parse-fn-glyph?
(fn (v)
(some (fn (g) (= g v)) apl-parse-fn-glyphs)))
; ============================================================
; Token accessors
; ============================================================
(define tok-type
(fn (tok)
(get tok :type)))
(define tok-val
(fn (tok)
(get tok :value)))
(define is-op-tok?
(fn (tok)
(and (= (tok-type tok) :glyph)
(apl-parse-op-glyph? (tok-val tok)))))
(define is-fn-tok?
(fn (tok)
(and (= (tok-type tok) :glyph)
(apl-parse-fn-glyph? (tok-val tok)))))
; ============================================================
; Collect trailing operators starting at index i
; Returns {:ops (op ...) :end new-i}
; ============================================================
(define collect-ops
(fn (tokens i)
(collect-ops-loop tokens i (list))))
(define collect-ops-loop
(fn (tokens i acc)
(if (>= i (len tokens))
{:ops acc :end i}
(let ((tok (nth tokens i)))
(if (is-op-tok? tok)
(collect-ops-loop tokens (+ i 1) (append acc (tok-val tok)))
{:ops acc :end i})))))
; ============================================================
; Build a derived-fn node by chaining operators left-to-right
; (+/¨ → (:derived-fn "¨" (:derived-fn "/" (:fn-glyph "+"))))
; ============================================================
(define build-derived-fn
(fn (fn-node ops)
(if (= (len ops) 0)
fn-node
(build-derived-fn
(list :derived-fn (first ops) fn-node)
(rest ops)))))
; ============================================================
; Find matching close bracket/paren/brace
; Returns the index of the matching close token
; ============================================================
(define find-matching-close
(fn (tokens start open-type close-type)
(find-matching-close-loop tokens start open-type close-type 1)))
(define find-matching-close-loop
(fn (tokens i open-type close-type depth)
(if (>= i (len tokens))
(len tokens)
(let ((tt (tok-type (nth tokens i))))
(cond
((= tt open-type)
(find-matching-close-loop tokens (+ i 1) open-type close-type (+ depth 1)))
((= tt close-type)
(if (= depth 1)
i
(find-matching-close-loop tokens (+ i 1) open-type close-type (- depth 1))))
(true
(find-matching-close-loop tokens (+ i 1) open-type close-type depth)))))))
; ============================================================
; Segment collection: scan tokens left-to-right, building
; a list of {:kind "val"/"fn" :node ast} segments.
; Operators following function glyphs are merged into
; derived-fn nodes during this pass.
; ============================================================
(define collect-segments
(fn (tokens)
(collect-segments-loop tokens 0 (list))))
(define collect-segments-loop
(fn (tokens i acc)
(if (>= i (len tokens))
acc
(let ((tok (nth tokens i))
(n (len tokens)))
(let ((tt (tok-type tok))
(tv (tok-val tok)))
(cond
; Skip separators
((or (= tt :diamond) (= tt :newline) (= tt :semi))
(collect-segments-loop tokens (+ i 1) acc))
; Number → value segment
((= tt :num)
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :num tv)})))
; String → value segment
((= tt :str)
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :str tv)})))
; Name → always a value segment in Phase 1
; (Named functions with operators like f/ are Phase 5)
((= tt :name)
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :name tv)})))
; Left paren → parse subexpression recursively
((= tt :lparen)
(let ((end (find-matching-close tokens (+ i 1) :lparen :rparen)))
(let ((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(collect-segments-loop tokens after
(append acc {:kind "val" :node (parse-apl-expr inner-tokens)})))))
; Left brace → dfn
((= tt :lbrace)
(let ((end (find-matching-close tokens (+ i 1) :lbrace :rbrace)))
(let ((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(collect-segments-loop tokens after
(append acc {:kind "fn" :node (parse-dfn inner-tokens)})))))
; Glyph token — need to classify
((= tt :glyph)
(cond
; Alpha () and Omega (⍵) → values inside dfn context
((or (= tv "") (= tv "⍵"))
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :name tv)})))
; Nabla (∇) → self-reference function in dfn context
((= tv "∇")
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "fn" :node (list :fn-glyph "∇")})))
; ∘. → outer product (special case: ∘ followed by .)
((and (= tv "∘")
(< (+ i 1) n)
(= (tok-val (nth tokens (+ i 1))) "."))
(if (and (< (+ i 2) n) (is-fn-tok? (nth tokens (+ i 2))))
(let ((fn-tv (tok-val (nth tokens (+ i 2)))))
(let ((op-result (collect-ops tokens (+ i 3))))
(let ((ops (get op-result :ops))
(ni (get op-result :end)))
(let ((fn-node (build-derived-fn (list :fn-glyph fn-tv) ops)))
(collect-segments-loop tokens ni
(append acc {:kind "fn" :node (list :outer "∘." fn-node)}))))))
; ∘. without function — treat ∘ as plain compose operator
; skip the . and continue
(collect-segments-loop tokens (+ i 1)
acc)))
; Function glyph — collect following operators
((apl-parse-fn-glyph? tv)
(let ((op-result (collect-ops tokens (+ i 1))))
(let ((ops (get op-result :ops))
(ni (get op-result :end)))
; Check for inner product: fn . fn
; (ops = ("." ) and next token is also a function glyph)
(if (and (= (len ops) 1)
(= (first ops) ".")
(< ni n)
(is-fn-tok? (nth tokens ni)))
; f.g inner product
(let ((g-tv (tok-val (nth tokens ni))))
(let ((op-result2 (collect-ops tokens (+ ni 1))))
(let ((ops2 (get op-result2 :ops))
(ni2 (get op-result2 :end)))
(let ((g-node (build-derived-fn (list :fn-glyph g-tv) ops2)))
(collect-segments-loop tokens ni2
(append acc {:kind "fn"
:node (list :derived-fn2 "." (list :fn-glyph tv) g-node)}))))))
; Regular function with zero or more operator modifiers
(let ((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
(collect-segments-loop tokens ni
(append acc {:kind "fn" :node fn-node})))))))
; Stray operator glyph — skip (shouldn't appear outside function context)
((apl-parse-op-glyph? tv)
(collect-segments-loop tokens (+ i 1) acc))
; Unknown glyph — skip
(true
(collect-segments-loop tokens (+ i 1) acc))))
; Skip unknown token types
(true
(collect-segments-loop tokens (+ i 1) acc))))))))
; ============================================================
; Build tree from segment list
;
; The segments are in left-to-right order.
; APL evaluates right-to-left, so the LEFTMOST function is
; the outermost (last-evaluated) node.
;
; Patterns:
; [val] → val node
; [fn val ...] → (:monad fn (build-tree rest))
; [val fn val ...] → (:dyad fn val (build-tree rest))
; [val val ...] → (:vec val1 val2 ...) — strand
; ============================================================
; Find the index of the first function segment (returns -1 if none)
(define find-first-fn
(fn (segs)
(find-first-fn-loop segs 0)))
(define find-first-fn-loop
(fn (segs i)
(if (>= i (len segs))
-1
(if (= (get (nth segs i) :kind) "fn")
i
(find-first-fn-loop segs (+ i 1))))))
; Build an array node from 0..n value segments
; If n=1 → return that segment's node
; If n>1 → return (:vec node1 node2 ...)
(define segs-to-array
(fn (segs)
(if (= (len segs) 1)
(get (first segs) :node)
(cons :vec (map (fn (s) (get s :node)) segs)))))
(define build-tree
(fn (segs)
(cond
; Empty → nil
((= (len segs) 0) nil)
; Single segment → return its node directly
((= (len segs) 1) (get (first segs) :node))
; All values → strand
((every? (fn (s) (= (get s :kind) "val")) segs)
(segs-to-array segs))
; Find the first function segment
(true
(let ((fn-idx (find-first-fn segs)))
(cond
; No function found (shouldn't happen given above checks) → strand
((= fn-idx -1) (segs-to-array segs))
; Function is first → monadic call
((= fn-idx 0)
(list :monad
(get (first segs) :node)
(build-tree (rest segs))))
; Function at position fn-idx: left args are segs[0..fn-idx-1]
(true
(let ((left-segs (slice segs 0 fn-idx))
(fn-seg (nth segs fn-idx))
(right-segs (slice segs (+ fn-idx 1))))
(list :dyad
(get fn-seg :node)
(segs-to-array left-segs)
(build-tree right-segs))))))))))
; ============================================================
; Split token list on statement separators (diamond / newline)
; Only splits at depth 0 (ignores separators inside { } or ( ) )
; ============================================================
(define split-statements
(fn (tokens)
(split-statements-loop tokens (list) (list) 0)))
(define split-statements-loop
(fn (tokens current-stmt acc depth)
(if (= (len tokens) 0)
(if (> (len current-stmt) 0)
(append acc (list current-stmt))
acc)
(let ((tok (first tokens))
(rest-toks (rest tokens))
(tt (tok-type (first tokens))))
(cond
; Open brackets increase depth
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(split-statements-loop rest-toks (append current-stmt tok) acc (+ depth 1)))
; Close brackets decrease depth
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(split-statements-loop rest-toks (append current-stmt tok) acc (- depth 1)))
; Separators only split at top level (depth = 0)
((and (> depth 0) (or (= tt :diamond) (= tt :newline)))
(split-statements-loop rest-toks (append current-stmt tok) acc depth))
((and (= depth 0) (or (= tt :diamond) (= tt :newline)))
(if (> (len current-stmt) 0)
(split-statements-loop rest-toks (list) (append acc (list current-stmt)) depth)
(split-statements-loop rest-toks (list) acc depth)))
; All other tokens go into current statement
(true
(split-statements-loop rest-toks (append current-stmt tok) acc depth)))))))
; ============================================================
; Parse a dfn body (tokens between { and })
; Handles guard expressions: cond : expr
; ============================================================
(define parse-dfn
(fn (tokens)
(let ((stmt-groups (split-statements tokens)))
(let ((stmts (map parse-dfn-stmt stmt-groups)))
(cons :dfn stmts)))))
(define parse-dfn-stmt
(fn (tokens)
; Check for guard: expr : expr
; A guard has a :colon token not inside parens/braces
(let ((colon-idx (find-top-level-colon tokens 0)))
(if (>= colon-idx 0)
; Guard: cond : expr
(let ((cond-tokens (slice tokens 0 colon-idx))
(body-tokens (slice tokens (+ colon-idx 1))))
(list :guard
(parse-apl-expr cond-tokens)
(parse-apl-expr body-tokens)))
; Regular statement
(parse-stmt tokens)))))
(define find-top-level-colon
(fn (tokens i)
(find-top-level-colon-loop tokens i 0)))
(define find-top-level-colon-loop
(fn (tokens i depth)
(if (>= i (len tokens))
-1
(let ((tok (nth tokens i))
(tt (tok-type (nth tokens i))))
(cond
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(find-top-level-colon-loop tokens (+ i 1) (+ depth 1)))
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(find-top-level-colon-loop tokens (+ i 1) (- depth 1)))
((and (= tt :colon) (= depth 0))
i)
(true
(find-top-level-colon-loop tokens (+ i 1) depth)))))))
; ============================================================
; Parse a single statement (assignment or expression)
; ============================================================
(define parse-stmt
(fn (tokens)
(if (and (>= (len tokens) 2)
(= (tok-type (nth tokens 0)) :name)
(= (tok-type (nth tokens 1)) :assign))
; Assignment: name ← expr
(list :assign
(tok-val (nth tokens 0))
(parse-apl-expr (slice tokens 2)))
; Expression
(parse-apl-expr tokens))))
; ============================================================
; Parse an expression from a flat token list
; ============================================================
(define parse-apl-expr
(fn (tokens)
(let ((segs (collect-segments tokens)))
(if (= (len segs) 0)
nil
(build-tree segs)))))
; ============================================================
; Main entry point
; parse-apl: string → AST
; ============================================================
(define parse-apl
(fn (src)
(let ((tokens (apl-tokenize src)))
(let ((stmt-groups (split-statements tokens)))
(if (= (len stmt-groups) 0)
nil
(if (= (len stmt-groups) 1)
(parse-stmt (first stmt-groups))
(cons :program (map parse-stmt stmt-groups))))))))