apl: right-to-left parser + 44 tests (Phase 1, step 2)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 17s

Implement lib/apl/parser.sx — APL expression parser:
- Segment-based algorithm: scan L→R collecting {fn,val} segments
- build-tree constructs AST with leftmost-fn = root (right-to-left semantics)
- Handles: monadic/dyadic fns, strands (:vec), assignment (:assign)
- Operators: derived-fn (:derived-fn op fn), inner product (:derived-fn2)
- Outer product ∘.f (:outer), dfns {:dfn stmt...}, guards (:guard cond expr)
- split-statements is bracket-aware (depth tracking prevents splitting inside {})

44 new parser tests + 46 existing tokenizer = 90/90 green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 14:05:43 +00:00
parent dbba2fe418
commit da8ba104a6
3 changed files with 696 additions and 2 deletions

436
lib/apl/parser.sx Normal file
View File

@@ -0,0 +1,436 @@
; APL Parser — right-to-left expression parser
;
; Takes a token list (output of apl-tokenize) and produces an AST.
; APL evaluates right-to-left with no precedence among functions.
; Operators bind to the function immediately to their left in the source.
;
; AST node types:
; (:num n) number literal
; (:str s) string literal
; (:vec n1 n2 ...) strand (juxtaposed literals)
; (:name "x") name reference / alpha / omega
; (:assign "x" expr) assignment x←expr
; (:monad fn arg) monadic function call
; (:dyad fn left right) dyadic function call
; (:derived-fn op fn) derived function: f/ f¨ f⍨
; (:derived-fn2 "." f g) inner product: f.g
; (:outer "∘." fn) outer product: ∘.f
; (:fn-glyph "") function reference
; (:fn-name "foo") named-function reference (dfn variable)
; (:dfn stmt...) {+⍵} anonymous function
; (:guard cond expr) cond:expr guard inside dfn
; (:program stmt...) multi-statement sequence
; ============================================================
; Glyph classification sets
; ============================================================
(define apl-parse-op-glyphs
(list "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@"))
(define apl-parse-fn-glyphs
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~"
"<" "≤" "=" "≥" ">" "≠" "∊" "∧" "" "⍱" "⍲"
"," "⍪" "" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
"" "∩" "" "⍸" "⌷" "⍋" "⍒" "⊥" "" "⊣" "⊢" "⍎" "⍕"))
(define apl-parse-op-glyph?
(fn (v)
(some (fn (g) (= g v)) apl-parse-op-glyphs)))
(define apl-parse-fn-glyph?
(fn (v)
(some (fn (g) (= g v)) apl-parse-fn-glyphs)))
; ============================================================
; Token accessors
; ============================================================
(define tok-type
(fn (tok)
(get tok :type)))
(define tok-val
(fn (tok)
(get tok :value)))
(define is-op-tok?
(fn (tok)
(and (= (tok-type tok) :glyph)
(apl-parse-op-glyph? (tok-val tok)))))
(define is-fn-tok?
(fn (tok)
(and (= (tok-type tok) :glyph)
(apl-parse-fn-glyph? (tok-val tok)))))
; ============================================================
; Collect trailing operators starting at index i
; Returns {:ops (op ...) :end new-i}
; ============================================================
(define collect-ops
(fn (tokens i)
(collect-ops-loop tokens i (list))))
(define collect-ops-loop
(fn (tokens i acc)
(if (>= i (len tokens))
{:ops acc :end i}
(let ((tok (nth tokens i)))
(if (is-op-tok? tok)
(collect-ops-loop tokens (+ i 1) (append acc (tok-val tok)))
{:ops acc :end i})))))
; ============================================================
; Build a derived-fn node by chaining operators left-to-right
; (+/¨ → (:derived-fn "¨" (:derived-fn "/" (:fn-glyph "+"))))
; ============================================================
(define build-derived-fn
(fn (fn-node ops)
(if (= (len ops) 0)
fn-node
(build-derived-fn
(list :derived-fn (first ops) fn-node)
(rest ops)))))
; ============================================================
; Find matching close bracket/paren/brace
; Returns the index of the matching close token
; ============================================================
(define find-matching-close
(fn (tokens start open-type close-type)
(find-matching-close-loop tokens start open-type close-type 1)))
(define find-matching-close-loop
(fn (tokens i open-type close-type depth)
(if (>= i (len tokens))
(len tokens)
(let ((tt (tok-type (nth tokens i))))
(cond
((= tt open-type)
(find-matching-close-loop tokens (+ i 1) open-type close-type (+ depth 1)))
((= tt close-type)
(if (= depth 1)
i
(find-matching-close-loop tokens (+ i 1) open-type close-type (- depth 1))))
(true
(find-matching-close-loop tokens (+ i 1) open-type close-type depth)))))))
; ============================================================
; Segment collection: scan tokens left-to-right, building
; a list of {:kind "val"/"fn" :node ast} segments.
; Operators following function glyphs are merged into
; derived-fn nodes during this pass.
; ============================================================
(define collect-segments
(fn (tokens)
(collect-segments-loop tokens 0 (list))))
(define collect-segments-loop
(fn (tokens i acc)
(if (>= i (len tokens))
acc
(let ((tok (nth tokens i))
(n (len tokens)))
(let ((tt (tok-type tok))
(tv (tok-val tok)))
(cond
; Skip separators
((or (= tt :diamond) (= tt :newline) (= tt :semi))
(collect-segments-loop tokens (+ i 1) acc))
; Number → value segment
((= tt :num)
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :num tv)})))
; String → value segment
((= tt :str)
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :str tv)})))
; Name → always a value segment in Phase 1
; (Named functions with operators like f/ are Phase 5)
((= tt :name)
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :name tv)})))
; Left paren → parse subexpression recursively
((= tt :lparen)
(let ((end (find-matching-close tokens (+ i 1) :lparen :rparen)))
(let ((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(collect-segments-loop tokens after
(append acc {:kind "val" :node (parse-apl-expr inner-tokens)})))))
; Left brace → dfn
((= tt :lbrace)
(let ((end (find-matching-close tokens (+ i 1) :lbrace :rbrace)))
(let ((inner-tokens (slice tokens (+ i 1) end))
(after (+ end 1)))
(collect-segments-loop tokens after
(append acc {:kind "fn" :node (parse-dfn inner-tokens)})))))
; Glyph token — need to classify
((= tt :glyph)
(cond
; Alpha () and Omega (⍵) → values inside dfn context
((or (= tv "") (= tv "⍵"))
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "val" :node (list :name tv)})))
; Nabla (∇) → self-reference function in dfn context
((= tv "∇")
(collect-segments-loop tokens (+ i 1)
(append acc {:kind "fn" :node (list :fn-glyph "∇")})))
; ∘. → outer product (special case: ∘ followed by .)
((and (= tv "∘")
(< (+ i 1) n)
(= (tok-val (nth tokens (+ i 1))) "."))
(if (and (< (+ i 2) n) (is-fn-tok? (nth tokens (+ i 2))))
(let ((fn-tv (tok-val (nth tokens (+ i 2)))))
(let ((op-result (collect-ops tokens (+ i 3))))
(let ((ops (get op-result :ops))
(ni (get op-result :end)))
(let ((fn-node (build-derived-fn (list :fn-glyph fn-tv) ops)))
(collect-segments-loop tokens ni
(append acc {:kind "fn" :node (list :outer "∘." fn-node)}))))))
; ∘. without function — treat ∘ as plain compose operator
; skip the . and continue
(collect-segments-loop tokens (+ i 1)
acc)))
; Function glyph — collect following operators
((apl-parse-fn-glyph? tv)
(let ((op-result (collect-ops tokens (+ i 1))))
(let ((ops (get op-result :ops))
(ni (get op-result :end)))
; Check for inner product: fn . fn
; (ops = ("." ) and next token is also a function glyph)
(if (and (= (len ops) 1)
(= (first ops) ".")
(< ni n)
(is-fn-tok? (nth tokens ni)))
; f.g inner product
(let ((g-tv (tok-val (nth tokens ni))))
(let ((op-result2 (collect-ops tokens (+ ni 1))))
(let ((ops2 (get op-result2 :ops))
(ni2 (get op-result2 :end)))
(let ((g-node (build-derived-fn (list :fn-glyph g-tv) ops2)))
(collect-segments-loop tokens ni2
(append acc {:kind "fn"
:node (list :derived-fn2 "." (list :fn-glyph tv) g-node)}))))))
; Regular function with zero or more operator modifiers
(let ((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
(collect-segments-loop tokens ni
(append acc {:kind "fn" :node fn-node})))))))
; Stray operator glyph — skip (shouldn't appear outside function context)
((apl-parse-op-glyph? tv)
(collect-segments-loop tokens (+ i 1) acc))
; Unknown glyph — skip
(true
(collect-segments-loop tokens (+ i 1) acc))))
; Skip unknown token types
(true
(collect-segments-loop tokens (+ i 1) acc))))))))
; ============================================================
; Build tree from segment list
;
; The segments are in left-to-right order.
; APL evaluates right-to-left, so the LEFTMOST function is
; the outermost (last-evaluated) node.
;
; Patterns:
; [val] → val node
; [fn val ...] → (:monad fn (build-tree rest))
; [val fn val ...] → (:dyad fn val (build-tree rest))
; [val val ...] → (:vec val1 val2 ...) — strand
; ============================================================
; Find the index of the first function segment (returns -1 if none)
(define find-first-fn
(fn (segs)
(find-first-fn-loop segs 0)))
(define find-first-fn-loop
(fn (segs i)
(if (>= i (len segs))
-1
(if (= (get (nth segs i) :kind) "fn")
i
(find-first-fn-loop segs (+ i 1))))))
; Build an array node from 0..n value segments
; If n=1 → return that segment's node
; If n>1 → return (:vec node1 node2 ...)
(define segs-to-array
(fn (segs)
(if (= (len segs) 1)
(get (first segs) :node)
(cons :vec (map (fn (s) (get s :node)) segs)))))
(define build-tree
(fn (segs)
(cond
; Empty → nil
((= (len segs) 0) nil)
; Single segment → return its node directly
((= (len segs) 1) (get (first segs) :node))
; All values → strand
((every? (fn (s) (= (get s :kind) "val")) segs)
(segs-to-array segs))
; Find the first function segment
(true
(let ((fn-idx (find-first-fn segs)))
(cond
; No function found (shouldn't happen given above checks) → strand
((= fn-idx -1) (segs-to-array segs))
; Function is first → monadic call
((= fn-idx 0)
(list :monad
(get (first segs) :node)
(build-tree (rest segs))))
; Function at position fn-idx: left args are segs[0..fn-idx-1]
(true
(let ((left-segs (slice segs 0 fn-idx))
(fn-seg (nth segs fn-idx))
(right-segs (slice segs (+ fn-idx 1))))
(list :dyad
(get fn-seg :node)
(segs-to-array left-segs)
(build-tree right-segs))))))))))
; ============================================================
; Split token list on statement separators (diamond / newline)
; Only splits at depth 0 (ignores separators inside { } or ( ) )
; ============================================================
(define split-statements
(fn (tokens)
(split-statements-loop tokens (list) (list) 0)))
(define split-statements-loop
(fn (tokens current-stmt acc depth)
(if (= (len tokens) 0)
(if (> (len current-stmt) 0)
(append acc (list current-stmt))
acc)
(let ((tok (first tokens))
(rest-toks (rest tokens))
(tt (tok-type (first tokens))))
(cond
; Open brackets increase depth
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(split-statements-loop rest-toks (append current-stmt tok) acc (+ depth 1)))
; Close brackets decrease depth
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(split-statements-loop rest-toks (append current-stmt tok) acc (- depth 1)))
; Separators only split at top level (depth = 0)
((and (> depth 0) (or (= tt :diamond) (= tt :newline)))
(split-statements-loop rest-toks (append current-stmt tok) acc depth))
((and (= depth 0) (or (= tt :diamond) (= tt :newline)))
(if (> (len current-stmt) 0)
(split-statements-loop rest-toks (list) (append acc (list current-stmt)) depth)
(split-statements-loop rest-toks (list) acc depth)))
; All other tokens go into current statement
(true
(split-statements-loop rest-toks (append current-stmt tok) acc depth)))))))
; ============================================================
; Parse a dfn body (tokens between { and })
; Handles guard expressions: cond : expr
; ============================================================
(define parse-dfn
(fn (tokens)
(let ((stmt-groups (split-statements tokens)))
(let ((stmts (map parse-dfn-stmt stmt-groups)))
(cons :dfn stmts)))))
(define parse-dfn-stmt
(fn (tokens)
; Check for guard: expr : expr
; A guard has a :colon token not inside parens/braces
(let ((colon-idx (find-top-level-colon tokens 0)))
(if (>= colon-idx 0)
; Guard: cond : expr
(let ((cond-tokens (slice tokens 0 colon-idx))
(body-tokens (slice tokens (+ colon-idx 1))))
(list :guard
(parse-apl-expr cond-tokens)
(parse-apl-expr body-tokens)))
; Regular statement
(parse-stmt tokens)))))
(define find-top-level-colon
(fn (tokens i)
(find-top-level-colon-loop tokens i 0)))
(define find-top-level-colon-loop
(fn (tokens i depth)
(if (>= i (len tokens))
-1
(let ((tok (nth tokens i))
(tt (tok-type (nth tokens i))))
(cond
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
(find-top-level-colon-loop tokens (+ i 1) (+ depth 1)))
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
(find-top-level-colon-loop tokens (+ i 1) (- depth 1)))
((and (= tt :colon) (= depth 0))
i)
(true
(find-top-level-colon-loop tokens (+ i 1) depth)))))))
; ============================================================
; Parse a single statement (assignment or expression)
; ============================================================
(define parse-stmt
(fn (tokens)
(if (and (>= (len tokens) 2)
(= (tok-type (nth tokens 0)) :name)
(= (tok-type (nth tokens 1)) :assign))
; Assignment: name ← expr
(list :assign
(tok-val (nth tokens 0))
(parse-apl-expr (slice tokens 2)))
; Expression
(parse-apl-expr tokens))))
; ============================================================
; Parse an expression from a flat token list
; ============================================================
(define parse-apl-expr
(fn (tokens)
(let ((segs (collect-segments tokens)))
(if (= (len segs) 0)
nil
(build-tree segs)))))
; ============================================================
; Main entry point
; parse-apl: string → AST
; ============================================================
(define parse-apl
(fn (src)
(let ((tokens (apl-tokenize src)))
(let ((stmt-groups (split-statements tokens)))
(if (= (len stmt-groups) 0)
nil
(if (= (len stmt-groups) 1)
(parse-stmt (first stmt-groups))
(cons :program (map parse-stmt stmt-groups))))))))