Compare commits
15 Commits
loops/mini
...
loops/ocam
| Author | SHA1 | Date | |
|---|---|---|---|
| 9a090c6e42 | |||
| 85b7fed4fc | |||
| 1eb9d0f8d2 | |||
| f182d04e6a | |||
| ab2c40c14c | |||
| d3c34b46b9 | |||
| 80dac0051d | |||
| b661318a45 | |||
| 47d9d07f2e | |||
| d75c61d408 | |||
| a677585639 | |||
| c04f38a1ba | |||
| b13819c50c | |||
| d9cf00f287 | |||
| 0c0ed0605a |
@@ -25,8 +25,9 @@
|
||||
; Glyph classification sets
|
||||
; ============================================================
|
||||
|
||||
(define apl-parse-op-glyphs
|
||||
(list "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@"))
|
||||
(define
|
||||
apl-parse-op-glyphs
|
||||
(list "/" "⌿" "\\" "⍀" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@"))
|
||||
|
||||
(define
|
||||
apl-parse-fn-glyphs
|
||||
@@ -82,22 +83,48 @@
|
||||
"⍎"
|
||||
"⍕"))
|
||||
|
||||
(define apl-quad-fn-names (list "⎕FMT"))
|
||||
(define apl-quad-fn-names (list "⎕FMT" "⎕←"))
|
||||
|
||||
(define
|
||||
apl-parse-op-glyph?
|
||||
(fn (v) (some (fn (g) (= g v)) apl-parse-op-glyphs)))
|
||||
(define apl-known-fn-names (list))
|
||||
|
||||
; ============================================================
|
||||
; Token accessors
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
apl-collect-fn-bindings
|
||||
(fn
|
||||
(stmt-groups)
|
||||
(set! apl-known-fn-names (list))
|
||||
(for-each
|
||||
(fn
|
||||
(toks)
|
||||
(when
|
||||
(and
|
||||
(>= (len toks) 3)
|
||||
(= (tok-type (nth toks 0)) :name)
|
||||
(= (tok-type (nth toks 1)) :assign)
|
||||
(= (tok-type (nth toks 2)) :lbrace))
|
||||
(set!
|
||||
apl-known-fn-names
|
||||
(cons (tok-val (nth toks 0)) apl-known-fn-names))))
|
||||
stmt-groups)))
|
||||
|
||||
(define
|
||||
apl-parse-op-glyph?
|
||||
(fn (v) (some (fn (g) (= g v)) apl-parse-op-glyphs)))
|
||||
|
||||
(define
|
||||
apl-parse-fn-glyph?
|
||||
(fn (v) (some (fn (g) (= g v)) apl-parse-fn-glyphs)))
|
||||
|
||||
(define tok-type (fn (tok) (get tok :type)))
|
||||
|
||||
; ============================================================
|
||||
; Collect trailing operators starting at index i
|
||||
; Returns {:ops (op ...) :end new-i}
|
||||
; ============================================================
|
||||
|
||||
(define tok-val (fn (tok) (get tok :value)))
|
||||
|
||||
(define
|
||||
@@ -107,8 +134,8 @@
|
||||
(and (= (tok-type tok) :glyph) (apl-parse-op-glyph? (tok-val tok)))))
|
||||
|
||||
; ============================================================
|
||||
; Collect trailing operators starting at index i
|
||||
; Returns {:ops (op ...) :end new-i}
|
||||
; Build a derived-fn node by chaining operators left-to-right
|
||||
; (+/¨ → (:derived-fn "¨" (:derived-fn "/" (:fn-glyph "+"))))
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
@@ -119,15 +146,17 @@
|
||||
(and (= (tok-type tok) :glyph) (apl-parse-fn-glyph? (tok-val tok)))
|
||||
(and
|
||||
(= (tok-type tok) :name)
|
||||
(some (fn (q) (= q (tok-val tok))) apl-quad-fn-names)))))
|
||||
(or
|
||||
(some (fn (q) (= q (tok-val tok))) apl-quad-fn-names)
|
||||
(some (fn (q) (= q (tok-val tok))) apl-known-fn-names))))))
|
||||
|
||||
; ============================================================
|
||||
; Find matching close bracket/paren/brace
|
||||
; Returns the index of the matching close token
|
||||
; ============================================================
|
||||
|
||||
(define collect-ops (fn (tokens i) (collect-ops-loop tokens i (list))))
|
||||
|
||||
; ============================================================
|
||||
; Build a derived-fn node by chaining operators left-to-right
|
||||
; (+/¨ → (:derived-fn "¨" (:derived-fn "/" (:fn-glyph "+"))))
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
collect-ops-loop
|
||||
(fn
|
||||
@@ -143,8 +172,10 @@
|
||||
{:end i :ops acc})))))
|
||||
|
||||
; ============================================================
|
||||
; Find matching close bracket/paren/brace
|
||||
; Returns the index of the matching close token
|
||||
; Segment collection: scan tokens left-to-right, building
|
||||
; a list of {:kind "val"/"fn" :node ast} segments.
|
||||
; Operators following function glyphs are merged into
|
||||
; derived-fn nodes during this pass.
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
@@ -163,12 +194,20 @@
|
||||
(find-matching-close-loop tokens start open-type close-type 1)))
|
||||
|
||||
; ============================================================
|
||||
; Segment collection: scan tokens left-to-right, building
|
||||
; a list of {:kind "val"/"fn" :node ast} segments.
|
||||
; Operators following function glyphs are merged into
|
||||
; derived-fn nodes during this pass.
|
||||
; Build tree from segment list
|
||||
;
|
||||
; The segments are in left-to-right order.
|
||||
; APL evaluates right-to-left, so the LEFTMOST function is
|
||||
; the outermost (last-evaluated) node.
|
||||
;
|
||||
; Patterns:
|
||||
; [val] → val node
|
||||
; [fn val ...] → (:monad fn (build-tree rest))
|
||||
; [val fn val ...] → (:dyad fn val (build-tree rest))
|
||||
; [val val ...] → (:vec val1 val2 ...) — strand
|
||||
; ============================================================
|
||||
|
||||
; Find the index of the first function segment (returns -1 if none)
|
||||
(define
|
||||
find-matching-close-loop
|
||||
(fn
|
||||
@@ -208,21 +247,9 @@
|
||||
collect-segments
|
||||
(fn (tokens) (collect-segments-loop tokens 0 (list))))
|
||||
|
||||
; ============================================================
|
||||
; Build tree from segment list
|
||||
;
|
||||
; The segments are in left-to-right order.
|
||||
; APL evaluates right-to-left, so the LEFTMOST function is
|
||||
; the outermost (last-evaluated) node.
|
||||
;
|
||||
; Patterns:
|
||||
; [val] → val node
|
||||
; [fn val ...] → (:monad fn (build-tree rest))
|
||||
; [val fn val ...] → (:dyad fn val (build-tree rest))
|
||||
; [val val ...] → (:vec val1 val2 ...) — strand
|
||||
; ============================================================
|
||||
|
||||
; Find the index of the first function segment (returns -1 if none)
|
||||
; Build an array node from 0..n value segments
|
||||
; If n=1 → return that segment's node
|
||||
; If n>1 → return (:vec node1 node2 ...)
|
||||
(define
|
||||
collect-segments-loop
|
||||
(fn
|
||||
@@ -242,24 +269,38 @@
|
||||
((= tt :str)
|
||||
(collect-segments-loop tokens (+ i 1) (append acc {:kind "val" :node (list :str tv)})))
|
||||
((= tt :name)
|
||||
(if
|
||||
(some (fn (q) (= q tv)) apl-quad-fn-names)
|
||||
(let
|
||||
((op-result (collect-ops tokens (+ i 1))))
|
||||
(cond
|
||||
((some (fn (q) (= q tv)) apl-quad-fn-names)
|
||||
(let
|
||||
((ops (get op-result :ops)) (ni (get op-result :end)))
|
||||
((op-result (collect-ops tokens (+ i 1))))
|
||||
(let
|
||||
((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
ni
|
||||
(append acc {:kind "fn" :node fn-node})))))
|
||||
(let
|
||||
((br (maybe-bracket (list :name tv) tokens (+ i 1))))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
(nth br 1)
|
||||
(append acc {:kind "val" :node (nth br 0)})))))
|
||||
((ops (get op-result :ops))
|
||||
(ni (get op-result :end)))
|
||||
(let
|
||||
((fn-node (build-derived-fn (list :fn-glyph tv) ops)))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
ni
|
||||
(append acc {:kind "fn" :node fn-node}))))))
|
||||
((some (fn (q) (= q tv)) apl-known-fn-names)
|
||||
(let
|
||||
((op-result (collect-ops tokens (+ i 1))))
|
||||
(let
|
||||
((ops (get op-result :ops))
|
||||
(ni (get op-result :end)))
|
||||
(let
|
||||
((fn-node (build-derived-fn (list :fn-name tv) ops)))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
ni
|
||||
(append acc {:kind "fn" :node fn-node}))))))
|
||||
(else
|
||||
(let
|
||||
((br (maybe-bracket (list :name tv) tokens (+ i 1))))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
(nth br 1)
|
||||
(append acc {:kind "val" :node (nth br 0)}))))))
|
||||
((= tt :lparen)
|
||||
(let
|
||||
((end (find-matching-close tokens (+ i 1) :lparen :rparen)))
|
||||
@@ -267,11 +308,23 @@
|
||||
((inner-tokens (slice tokens (+ i 1) end))
|
||||
(after (+ end 1)))
|
||||
(let
|
||||
((br (maybe-bracket (parse-apl-expr inner-tokens) tokens after)))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
(nth br 1)
|
||||
(append acc {:kind "val" :node (nth br 0)}))))))
|
||||
((inner-segs (collect-segments inner-tokens)))
|
||||
(if
|
||||
(and
|
||||
(>= (len inner-segs) 2)
|
||||
(every? (fn (s) (= (get s :kind) "fn")) inner-segs))
|
||||
(let
|
||||
((train-node (cons :train (map (fn (s) (get s :node)) inner-segs))))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
after
|
||||
(append acc {:kind "fn" :node train-node})))
|
||||
(let
|
||||
((br (maybe-bracket (parse-apl-expr inner-tokens) tokens after)))
|
||||
(collect-segments-loop
|
||||
tokens
|
||||
(nth br 1)
|
||||
(append acc {:kind "val" :node (nth br 0)}))))))))
|
||||
((= tt :lbrace)
|
||||
(let
|
||||
((end (find-matching-close tokens (+ i 1) :lbrace :rbrace)))
|
||||
@@ -346,9 +399,12 @@
|
||||
|
||||
(define find-first-fn (fn (segs) (find-first-fn-loop segs 0)))
|
||||
|
||||
; Build an array node from 0..n value segments
|
||||
; If n=1 → return that segment's node
|
||||
; If n>1 → return (:vec node1 node2 ...)
|
||||
|
||||
; ============================================================
|
||||
; Split token list on statement separators (diamond / newline)
|
||||
; Only splits at depth 0 (ignores separators inside { } or ( ) )
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
find-first-fn-loop
|
||||
(fn
|
||||
@@ -370,10 +426,9 @@
|
||||
(get (first segs) :node)
|
||||
(cons :vec (map (fn (s) (get s :node)) segs)))))
|
||||
|
||||
|
||||
; ============================================================
|
||||
; Split token list on statement separators (diamond / newline)
|
||||
; Only splits at depth 0 (ignores separators inside { } or ( ) )
|
||||
; Parse a dfn body (tokens between { and })
|
||||
; Handles guard expressions: cond : expr
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
@@ -408,11 +463,6 @@
|
||||
split-statements
|
||||
(fn (tokens) (split-statements-loop tokens (list) (list) 0)))
|
||||
|
||||
; ============================================================
|
||||
; Parse a dfn body (tokens between { and })
|
||||
; Handles guard expressions: cond : expr
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
split-statements-loop
|
||||
(fn
|
||||
@@ -467,6 +517,10 @@
|
||||
((stmt-groups (split-statements tokens)))
|
||||
(let ((stmts (map parse-dfn-stmt stmt-groups))) (cons :dfn stmts)))))
|
||||
|
||||
; ============================================================
|
||||
; Parse a single statement (assignment or expression)
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
parse-dfn-stmt
|
||||
(fn
|
||||
@@ -483,12 +537,17 @@
|
||||
(parse-apl-expr body-tokens)))
|
||||
(parse-stmt tokens)))))
|
||||
|
||||
; ============================================================
|
||||
; Parse an expression from a flat token list
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
find-top-level-colon
|
||||
(fn (tokens i) (find-top-level-colon-loop tokens i 0)))
|
||||
|
||||
; ============================================================
|
||||
; Parse a single statement (assignment or expression)
|
||||
; Main entry point
|
||||
; parse-apl: string → AST
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
@@ -508,10 +567,6 @@
|
||||
((and (= tt :colon) (= depth 0)) i)
|
||||
(true (find-top-level-colon-loop tokens (+ i 1) depth)))))))
|
||||
|
||||
; ============================================================
|
||||
; Parse an expression from a flat token list
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
parse-stmt
|
||||
(fn
|
||||
@@ -526,11 +581,6 @@
|
||||
(parse-apl-expr (slice tokens 2)))
|
||||
(parse-apl-expr tokens))))
|
||||
|
||||
; ============================================================
|
||||
; Main entry point
|
||||
; parse-apl: string → AST
|
||||
; ============================================================
|
||||
|
||||
(define
|
||||
parse-apl-expr
|
||||
(fn
|
||||
@@ -547,13 +597,52 @@
|
||||
((tokens (apl-tokenize src)))
|
||||
(let
|
||||
((stmt-groups (split-statements tokens)))
|
||||
(if
|
||||
(= (len stmt-groups) 0)
|
||||
nil
|
||||
(begin
|
||||
(apl-collect-fn-bindings stmt-groups)
|
||||
(if
|
||||
(= (len stmt-groups) 1)
|
||||
(parse-stmt (first stmt-groups))
|
||||
(cons :program (map parse-stmt stmt-groups))))))))
|
||||
(= (len stmt-groups) 0)
|
||||
nil
|
||||
(if
|
||||
(= (len stmt-groups) 1)
|
||||
(parse-stmt (first stmt-groups))
|
||||
(cons :program (map parse-stmt stmt-groups)))))))))
|
||||
|
||||
(define
|
||||
split-bracket-loop
|
||||
(fn
|
||||
(tokens current acc depth)
|
||||
(if
|
||||
(= (len tokens) 0)
|
||||
(append acc (list current))
|
||||
(let
|
||||
((tok (first tokens)) (more (rest tokens)))
|
||||
(let
|
||||
((tt (tok-type tok)))
|
||||
(cond
|
||||
((or (= tt :lparen) (= tt :lbrace) (= tt :lbracket))
|
||||
(split-bracket-loop
|
||||
more
|
||||
(append current (list tok))
|
||||
acc
|
||||
(+ depth 1)))
|
||||
((or (= tt :rparen) (= tt :rbrace) (= tt :rbracket))
|
||||
(split-bracket-loop
|
||||
more
|
||||
(append current (list tok))
|
||||
acc
|
||||
(- depth 1)))
|
||||
((and (= tt :semi) (= depth 0))
|
||||
(split-bracket-loop
|
||||
more
|
||||
(list)
|
||||
(append acc (list current))
|
||||
depth))
|
||||
(else
|
||||
(split-bracket-loop more (append current (list tok)) acc depth))))))))
|
||||
|
||||
(define
|
||||
split-bracket-content
|
||||
(fn (tokens) (split-bracket-loop tokens (list) (list) 0)))
|
||||
|
||||
(define
|
||||
maybe-bracket
|
||||
@@ -569,8 +658,17 @@
|
||||
((inner-tokens (slice tokens (+ after 1) end))
|
||||
(next-after (+ end 1)))
|
||||
(let
|
||||
((idx-expr (parse-apl-expr inner-tokens)))
|
||||
(let
|
||||
((indexed (list :dyad (list :fn-glyph "⌷") idx-expr val-node)))
|
||||
(maybe-bracket indexed tokens next-after)))))
|
||||
((sections (split-bracket-content inner-tokens)))
|
||||
(if
|
||||
(= (len sections) 1)
|
||||
(let
|
||||
((idx-expr (parse-apl-expr inner-tokens)))
|
||||
(let
|
||||
((indexed (list :dyad (list :fn-glyph "⌷") idx-expr val-node)))
|
||||
(maybe-bracket indexed tokens next-after)))
|
||||
(let
|
||||
((axis-exprs (map (fn (toks) (if (= (len toks) 0) :all (parse-apl-expr toks))) sections)))
|
||||
(let
|
||||
((indexed (cons :bracket (cons val-node axis-exprs))))
|
||||
(maybe-bracket indexed tokens next-after)))))))
|
||||
(list val-node after))))
|
||||
|
||||
@@ -883,7 +883,7 @@
|
||||
(let
|
||||
((sub (apl-permutations (- n 1))))
|
||||
(reduce
|
||||
(fn (acc p) (append acc (apl-insert-everywhere n p)))
|
||||
(fn (acc p) (append (apl-insert-everywhere n p) acc))
|
||||
(list)
|
||||
sub)))))
|
||||
|
||||
@@ -985,6 +985,38 @@
|
||||
(some (fn (c) (= c 0)) codes)
|
||||
(some (fn (c) (= c (nth e 1))) codes)))))
|
||||
|
||||
(define
|
||||
apl-cartesian
|
||||
(fn
|
||||
(lists)
|
||||
(if
|
||||
(= (len lists) 0)
|
||||
(list (list))
|
||||
(let
|
||||
((rest-prods (apl-cartesian (rest lists))))
|
||||
(reduce
|
||||
(fn (acc x) (append acc (map (fn (p) (cons x p)) rest-prods)))
|
||||
(list)
|
||||
(first lists))))))
|
||||
|
||||
(define
|
||||
apl-bracket-multi
|
||||
(fn
|
||||
(axes arr)
|
||||
(let
|
||||
((shape (get arr :shape)) (ravel (get arr :ravel)))
|
||||
(let
|
||||
((rank (len shape)) (strides (apl-strides shape)))
|
||||
(let
|
||||
((axis-info (map (fn (i) (let ((a (nth axes i))) (cond ((= a nil) {:idxs (range 0 (nth shape i)) :scalar? false}) ((= (len (get a :shape)) 0) {:idxs (list (- (first (get a :ravel)) apl-io)) :scalar? true}) (else {:idxs (map (fn (x) (- x apl-io)) (get a :ravel)) :scalar? false})))) (range 0 rank))))
|
||||
(let
|
||||
((cells (apl-cartesian (map (fn (a) (get a :idxs)) axis-info))))
|
||||
(let
|
||||
((result-ravel (map (fn (cell) (let ((flat (reduce + 0 (map (fn (i) (* (nth cell i) (nth strides i))) (range 0 rank))))) (nth ravel flat))) cells)))
|
||||
(let
|
||||
((result-shape (filter (fn (x) (>= x 0)) (map (fn (i) (let ((a (nth axis-info i))) (if (get a :scalar?) -1 (len (get a :idxs))))) (range 0 rank)))))
|
||||
(make-array result-shape result-ravel)))))))))
|
||||
|
||||
(define
|
||||
apl-reduce
|
||||
(fn
|
||||
|
||||
@@ -39,6 +39,7 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(load "lib/apl/tests/idioms.sx")
|
||||
(load "lib/apl/tests/eval-ops.sx")
|
||||
(load "lib/apl/tests/pipeline.sx")
|
||||
(load "lib/apl/tests/programs-e2e.sx")
|
||||
(epoch 4)
|
||||
(eval "(list apl-test-pass apl-test-fail)")
|
||||
EPOCHS
|
||||
|
||||
@@ -178,3 +178,137 @@
|
||||
"apl-run \"(⍳5)[3] × 7\" → 21"
|
||||
(mkrv (apl-run "(⍳5)[3] × 7"))
|
||||
(list 21))
|
||||
|
||||
(apl-test "decimal: 3.7 → 3.7" (mkrv (apl-run "3.7")) (list 3.7))
|
||||
|
||||
(apl-test "decimal: ¯2.5 → -2.5" (mkrv (apl-run "¯2.5")) (list -2.5))
|
||||
|
||||
(apl-test "decimal: 1.5 + 2.5 → 4" (mkrv (apl-run "1.5 + 2.5")) (list 4))
|
||||
|
||||
(apl-test "decimal: ⌊3.7 → 3" (mkrv (apl-run "⌊ 3.7")) (list 3))
|
||||
|
||||
(apl-test "decimal: ⌈3.7 → 4" (mkrv (apl-run "⌈ 3.7")) (list 4))
|
||||
|
||||
(apl-test
|
||||
"⎕← scalar passthrough"
|
||||
(mkrv (apl-run "⎕← 42"))
|
||||
(list 42))
|
||||
|
||||
(apl-test
|
||||
"⎕← vector passthrough"
|
||||
(mkrv (apl-run "⎕← 1 2 3"))
|
||||
(list 1 2 3))
|
||||
|
||||
(apl-test
|
||||
"string: 'abc' → 3-char vector"
|
||||
(mkrv (apl-run "'abc'"))
|
||||
(list "a" "b" "c"))
|
||||
|
||||
(apl-test "string: 'a' is rank-0 scalar" (mksh (apl-run "'a'")) (list))
|
||||
|
||||
(apl-test "string: 'hello' shape (5)" (mksh (apl-run "'hello'")) (list 5))
|
||||
|
||||
(apl-test
|
||||
"named-fn: f ← {⍺+⍵} ⋄ 3 f 4 → 7"
|
||||
(mkrv (apl-run "f ← {⍺+⍵} ⋄ 3 f 4"))
|
||||
(list 7))
|
||||
|
||||
(apl-test
|
||||
"named-fn monadic: sq ← {⍵×⍵} ⋄ sq 7 → 49"
|
||||
(mkrv (apl-run "sq ← {⍵×⍵} ⋄ sq 7"))
|
||||
(list 49))
|
||||
|
||||
(apl-test
|
||||
"named-fn dyadic: hyp ← {((⍺×⍺)+⍵×⍵)} ⋄ 3 hyp 4 → 25"
|
||||
(mkrv (apl-run "hyp ← {((⍺×⍺)+⍵×⍵)} ⋄ 3 hyp 4"))
|
||||
(list 25))
|
||||
|
||||
(apl-test
|
||||
"named-fn: dbl ← {⍵+⍵} ⋄ dbl ⍳5"
|
||||
(mkrv (apl-run "dbl ← {⍵+⍵} ⋄ dbl ⍳5"))
|
||||
(list 2 4 6 8 10))
|
||||
|
||||
(apl-test
|
||||
"named-fn factorial via ∇ recursion"
|
||||
(mkrv (apl-run "fact ← {0=⍵:1 ⋄ ⍵×∇⍵-1} ⋄ fact 5"))
|
||||
(list 120))
|
||||
|
||||
(apl-test
|
||||
"named-fn used twice in expr: dbl ← {⍵+⍵} ⋄ (dbl 3) + dbl 4"
|
||||
(mkrv (apl-run "dbl ← {⍵+⍵} ⋄ (dbl 3) + dbl 4"))
|
||||
(list 14))
|
||||
|
||||
(apl-test
|
||||
"named-fn with vector arg: neg ← {-⍵} ⋄ neg 1 2 3"
|
||||
(mkrv (apl-run "neg ← {-⍵} ⋄ neg 1 2 3"))
|
||||
(list -1 -2 -3))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: M[2;2] → center"
|
||||
(mkrv (apl-run "M ← (3 3) ⍴ ⍳9 ⋄ M[2;2]"))
|
||||
(list 5))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: M[1;] → first row"
|
||||
(mkrv (apl-run "M ← (3 3) ⍴ ⍳9 ⋄ M[1;]"))
|
||||
(list 1 2 3))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: M[;2] → second column"
|
||||
(mkrv (apl-run "M ← (3 3) ⍴ ⍳9 ⋄ M[;2]"))
|
||||
(list 2 5 8))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: M[1 2;1 2] → 2x2 block"
|
||||
(mkrv (apl-run "M ← (2 3) ⍴ ⍳6 ⋄ M[1 2;1 2]"))
|
||||
(list 1 2 4 5))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: M[1 2;1 2] shape (2 2)"
|
||||
(mksh (apl-run "M ← (2 3) ⍴ ⍳6 ⋄ M[1 2;1 2]"))
|
||||
(list 2 2))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: M[;] full matrix"
|
||||
(mkrv (apl-run "M ← (2 2) ⍴ 10 20 30 40 ⋄ M[;]"))
|
||||
(list 10 20 30 40))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: M[1;] shape collapsed"
|
||||
(mksh (apl-run "M ← (3 3) ⍴ ⍳9 ⋄ M[1;]"))
|
||||
(list 3))
|
||||
|
||||
(apl-test
|
||||
"multi-axis: select all rows of column 3"
|
||||
(mkrv (apl-run "M ← (4 3) ⍴ 1 2 3 4 5 6 7 8 9 10 11 12 ⋄ M[;3]"))
|
||||
(list 3 6 9 12))
|
||||
|
||||
(apl-test
|
||||
"train: mean = (+/÷≢) on 1..5"
|
||||
(mkrv (apl-run "(+/÷≢) 1 2 3 4 5"))
|
||||
(list 3))
|
||||
|
||||
(apl-test
|
||||
"train: mean of 2 4 6 8 10"
|
||||
(mkrv (apl-run "(+/÷≢) 2 4 6 8 10"))
|
||||
(list 6))
|
||||
|
||||
(apl-test
|
||||
"train 2-atop: (- ⌊) 5 → -5"
|
||||
(mkrv (apl-run "(- ⌊) 5"))
|
||||
(list -5))
|
||||
|
||||
(apl-test
|
||||
"train 3-fork dyadic: 2(+×-)5 → -21"
|
||||
(mkrv (apl-run "2 (+ × -) 5"))
|
||||
(list -21))
|
||||
|
||||
(apl-test
|
||||
"train: range = (⌈/-⌊/) on vector"
|
||||
(mkrv (apl-run "(⌈/-⌊/) 3 1 4 1 5 9 2 6"))
|
||||
(list 8))
|
||||
|
||||
(apl-test
|
||||
"train: mean of ⍳10 has shape ()"
|
||||
(mksh (apl-run "(+/÷≢) ⍳10"))
|
||||
(list))
|
||||
|
||||
96
lib/apl/tests/programs-e2e.sx
Normal file
96
lib/apl/tests/programs-e2e.sx
Normal file
@@ -0,0 +1,96 @@
|
||||
; End-to-end tests of the classic-program archetypes — running APL
|
||||
; source through the full pipeline (tokenize → parse → eval-ast → runtime).
|
||||
;
|
||||
; These mirror the algorithms documented in lib/apl/tests/programs/*.apl
|
||||
; but use forms our pipeline supports today (named functions instead of
|
||||
; the inline ⍵← rebinding idiom; multi-stmt over single one-liners).
|
||||
|
||||
(define mkrv (fn (arr) (get arr :ravel)))
|
||||
(define mksh (fn (arr) (get arr :shape)))
|
||||
|
||||
; ---------- factorial via ∇ recursion (cf. n-queens style) ----------
|
||||
|
||||
(apl-test
|
||||
"e2e: factorial 5! = 120"
|
||||
(mkrv (apl-run "fact ← {0=⍵:1 ⋄ ⍵×∇⍵-1} ⋄ fact 5"))
|
||||
(list 120))
|
||||
|
||||
(apl-test
|
||||
"e2e: factorial 7! = 5040"
|
||||
(mkrv (apl-run "fact ← {0=⍵:1 ⋄ ⍵×∇⍵-1} ⋄ fact 7"))
|
||||
(list 5040))
|
||||
|
||||
(apl-test
|
||||
"e2e: factorial via ×/⍳N (no recursion)"
|
||||
(mkrv (apl-run "fact ← {×/⍳⍵} ⋄ fact 6"))
|
||||
(list 720))
|
||||
|
||||
; ---------- sum / triangular numbers (sum-1..N) ----------
|
||||
|
||||
(apl-test
|
||||
"e2e: triangular(10) = 55"
|
||||
(mkrv (apl-run "tri ← {+/⍳⍵} ⋄ tri 10"))
|
||||
(list 55))
|
||||
|
||||
(apl-test
|
||||
"e2e: triangular(100) = 5050"
|
||||
(mkrv (apl-run "tri ← {+/⍳⍵} ⋄ tri 100"))
|
||||
(list 5050))
|
||||
|
||||
; ---------- sum of squares ----------
|
||||
|
||||
(apl-test
|
||||
"e2e: sum-of-squares 1..5 = 55"
|
||||
(mkrv (apl-run "ss ← {+/⍵×⍵} ⋄ ss ⍳5"))
|
||||
(list 55))
|
||||
|
||||
(apl-test
|
||||
"e2e: sum-of-squares 1..10 = 385"
|
||||
(mkrv (apl-run "ss ← {+/⍵×⍵} ⋄ ss ⍳10"))
|
||||
(list 385))
|
||||
|
||||
; ---------- divisor-counting (prime-sieve building blocks) ----------
|
||||
|
||||
(apl-test
|
||||
"e2e: divisor counts 1..5 via outer mod"
|
||||
(mkrv (apl-run "P ← ⍳ 5 ⋄ +⌿ 0 = P ∘.| P"))
|
||||
(list 1 2 2 3 2))
|
||||
|
||||
(apl-test
|
||||
"e2e: divisor counts 1..10"
|
||||
(mkrv (apl-run "P ← ⍳ 10 ⋄ +⌿ 0 = P ∘.| P"))
|
||||
(list 1 2 2 3 2 4 2 4 3 4))
|
||||
|
||||
(apl-test
|
||||
"e2e: prime-mask 1..10 (count==2)"
|
||||
(mkrv (apl-run "P ← ⍳ 10 ⋄ 2 = +⌿ 0 = P ∘.| P"))
|
||||
(list 0 1 1 0 1 0 1 0 0 0))
|
||||
|
||||
; ---------- monadic primitives chained ----------
|
||||
|
||||
(apl-test
|
||||
"e2e: sum of |abs| = 15"
|
||||
(mkrv (apl-run "+/|¯1 ¯2 ¯3 ¯4 ¯5"))
|
||||
(list 15))
|
||||
|
||||
(apl-test
|
||||
"e2e: max of squares 1..6"
|
||||
(mkrv (apl-run "⌈/(⍳6)×⍳6"))
|
||||
(list 36))
|
||||
|
||||
; ---------- nested named functions ----------
|
||||
|
||||
(apl-test
|
||||
"e2e: compose dbl and sq via two named fns"
|
||||
(mkrv (apl-run "dbl ← {⍵+⍵} ⋄ sq ← {⍵×⍵} ⋄ sq dbl 3"))
|
||||
(list 36))
|
||||
|
||||
(apl-test
|
||||
"e2e: max-of-two as named dyadic fn"
|
||||
(mkrv (apl-run "mx ← {⍺⌈⍵} ⋄ 5 mx 3"))
|
||||
(list 5))
|
||||
|
||||
(apl-test
|
||||
"e2e: sqrt-via-newton 1 step from 1 → 2.5"
|
||||
(mkrv (apl-run "step ← {(⍵+⍺÷⍵)÷2} ⋄ 4 step 1"))
|
||||
(list 2.5))
|
||||
@@ -252,6 +252,8 @@
|
||||
|
||||
(apl-test "queens 7 → 40 solutions" (mkrv (apl-queens 7)) (list 40))
|
||||
|
||||
(apl-test "queens 8 → 92 solutions" (mkrv (apl-queens 8)) (list 92))
|
||||
|
||||
(apl-test "permutations of 3 has 6" (len (apl-permutations 3)) 6)
|
||||
|
||||
(apl-test "permutations of 4 has 24" (len (apl-permutations 4)) 24)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~" "<" "≤" "=" "≥" ">" "≠"
|
||||
"≢" "≡" "∊" "∧" "∨" "⍱" "⍲" "," "⍪" "⍴" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
|
||||
"∪" "∩" "⍳" "⍸" "⌷" "⍋" "⍒" "⊥" "⊤" "⊣" "⊢" "⍎" "⍕"
|
||||
"⍺" "⍵" "∇" "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
|
||||
"⍺" "⍵" "∇" "/" "⌿" "\\" "⍀" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
|
||||
|
||||
(define apl-glyph?
|
||||
(fn (ch)
|
||||
@@ -138,12 +138,22 @@
|
||||
(begin
|
||||
(consume! "¯")
|
||||
(let ((digits (read-digits! "")))
|
||||
(tok-push! :num (- 0 (parse-int digits 0))))
|
||||
(if (and (< pos src-len) (= (cur-byte) ".")
|
||||
(< (+ pos 1) src-len) (apl-digit? (nth source (+ pos 1))))
|
||||
(begin (advance!)
|
||||
(let ((frac (read-digits! "")))
|
||||
(tok-push! :num (- 0 (string->number (str digits "." frac))))))
|
||||
(tok-push! :num (- 0 (parse-int digits 0)))))
|
||||
(scan!)))
|
||||
((apl-digit? ch)
|
||||
(begin
|
||||
(let ((digits (read-digits! "")))
|
||||
(tok-push! :num (parse-int digits 0)))
|
||||
(if (and (< pos src-len) (= (cur-byte) ".")
|
||||
(< (+ pos 1) src-len) (apl-digit? (nth source (+ pos 1))))
|
||||
(begin (advance!)
|
||||
(let ((frac (read-digits! "")))
|
||||
(tok-push! :num (string->number (str digits "." frac)))))
|
||||
(tok-push! :num (parse-int digits 0))))
|
||||
(scan!)))
|
||||
((= ch "'")
|
||||
(begin
|
||||
@@ -155,7 +165,9 @@
|
||||
(let ((start pos))
|
||||
(begin
|
||||
(if (cur-sw? "⎕") (consume! "⎕") (advance!))
|
||||
(read-ident-cont!)
|
||||
(if (and (< pos src-len) (cur-sw? "←"))
|
||||
(consume! "←")
|
||||
(read-ident-cont!))
|
||||
(tok-push! :name (slice source start pos))
|
||||
(scan!))))
|
||||
(true
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
((= g "⍋") apl-grade-up)
|
||||
((= g "⍒") apl-grade-down)
|
||||
((= g "⎕FMT") apl-quad-fmt)
|
||||
((= g "⎕←") apl-quad-print)
|
||||
(else (error "no monadic fn for glyph")))))
|
||||
|
||||
(define
|
||||
@@ -97,6 +98,15 @@
|
||||
((tag (first node)))
|
||||
(cond
|
||||
((= tag :num) (apl-scalar (nth node 1)))
|
||||
((= tag :str)
|
||||
(let
|
||||
((s (nth node 1)))
|
||||
(if
|
||||
(= (len s) 1)
|
||||
(apl-scalar s)
|
||||
(make-array
|
||||
(list (len s))
|
||||
(map (fn (i) (slice s i (+ i 1))) (range 0 (len s)))))))
|
||||
((= tag :vec)
|
||||
(let
|
||||
((items (rest node)))
|
||||
@@ -139,6 +149,16 @@
|
||||
(apl-eval-ast rhs env)))))
|
||||
((= tag :program) (apl-eval-stmts (rest node) env))
|
||||
((= tag :dfn) node)
|
||||
((= tag :bracket)
|
||||
(let
|
||||
((arr-expr (nth node 1)) (axis-exprs (rest (rest node))))
|
||||
(let
|
||||
((arr (apl-eval-ast arr-expr env))
|
||||
(axes
|
||||
(map
|
||||
(fn (a) (if (= a :all) nil (apl-eval-ast a env)))
|
||||
axis-exprs)))
|
||||
(apl-bracket-multi axes arr))))
|
||||
(else (error (list "apl-eval-ast: unknown node tag" tag node)))))))
|
||||
|
||||
(define
|
||||
@@ -419,6 +439,36 @@
|
||||
((f (apl-resolve-dyadic inner env)))
|
||||
(fn (arr) (apl-commute f arr))))
|
||||
(else (error "apl-resolve-monadic: unsupported op")))))
|
||||
((= tag :fn-name)
|
||||
(let
|
||||
((nm (nth fn-node 1)))
|
||||
(let
|
||||
((bound (get env nm)))
|
||||
(if
|
||||
(and
|
||||
(list? bound)
|
||||
(> (len bound) 0)
|
||||
(= (first bound) :dfn))
|
||||
(fn (arg) (apl-call-dfn-m bound arg))
|
||||
(error "apl-resolve-monadic: name not bound to dfn")))))
|
||||
((= tag :train)
|
||||
(let
|
||||
((fns (rest fn-node)))
|
||||
(let
|
||||
((n (len fns)))
|
||||
(cond
|
||||
((= n 2)
|
||||
(let
|
||||
((g (apl-resolve-monadic (nth fns 0) env))
|
||||
(h (apl-resolve-monadic (nth fns 1) env)))
|
||||
(fn (arg) (g (h arg)))))
|
||||
((= n 3)
|
||||
(let
|
||||
((f (apl-resolve-monadic (nth fns 0) env))
|
||||
(g (apl-resolve-dyadic (nth fns 1) env))
|
||||
(h (apl-resolve-monadic (nth fns 2) env)))
|
||||
(fn (arg) (g (f arg) (h arg)))))
|
||||
(else (error "monadic train arity not 2 or 3"))))))
|
||||
(else (error "apl-resolve-monadic: unknown fn-node tag"))))))
|
||||
|
||||
(define
|
||||
@@ -442,6 +492,18 @@
|
||||
((f (apl-resolve-dyadic inner env)))
|
||||
(fn (a b) (apl-commute-dyadic f a b))))
|
||||
(else (error "apl-resolve-dyadic: unsupported op")))))
|
||||
((= tag :fn-name)
|
||||
(let
|
||||
((nm (nth fn-node 1)))
|
||||
(let
|
||||
((bound (get env nm)))
|
||||
(if
|
||||
(and
|
||||
(list? bound)
|
||||
(> (len bound) 0)
|
||||
(= (first bound) :dfn))
|
||||
(fn (a b) (apl-call-dfn bound a b))
|
||||
(error "apl-resolve-dyadic: name not bound to dfn")))))
|
||||
((= tag :outer)
|
||||
(let
|
||||
((inner (nth fn-node 2)))
|
||||
@@ -455,6 +517,24 @@
|
||||
((f (apl-resolve-dyadic f-node env))
|
||||
(g (apl-resolve-dyadic g-node env)))
|
||||
(fn (a b) (apl-inner f g a b)))))
|
||||
((= tag :train)
|
||||
(let
|
||||
((fns (rest fn-node)))
|
||||
(let
|
||||
((n (len fns)))
|
||||
(cond
|
||||
((= n 2)
|
||||
(let
|
||||
((g (apl-resolve-monadic (nth fns 0) env))
|
||||
(h (apl-resolve-dyadic (nth fns 1) env)))
|
||||
(fn (a b) (g (h a b)))))
|
||||
((= n 3)
|
||||
(let
|
||||
((f (apl-resolve-dyadic (nth fns 0) env))
|
||||
(g (apl-resolve-dyadic (nth fns 1) env))
|
||||
(h (apl-resolve-dyadic (nth fns 2) env)))
|
||||
(fn (a b) (g (f a b) (h a b)))))
|
||||
(else (error "dyadic train arity not 2 or 3"))))))
|
||||
(else (error "apl-resolve-dyadic: unknown fn-node tag"))))))
|
||||
|
||||
(define apl-run (fn (src) (apl-eval-ast (parse-apl src) {})))
|
||||
|
||||
180
lib/guest/hm.sx
Normal file
180
lib/guest/hm.sx
Normal file
@@ -0,0 +1,180 @@
|
||||
;; lib/guest/hm.sx — Hindley-Milner type-inference foundations.
|
||||
;;
|
||||
;; Builds on lib/guest/match.sx (terms + unify) and ast.sx (canonical
|
||||
;; AST shapes). This file ships the ALGEBRA — types, schemes, free
|
||||
;; type-vars, generalize / instantiate, substitution composition — so a
|
||||
;; full Algorithm W (or J) can be assembled on top either inside this
|
||||
;; file or in a host-specific consumer (haskell/infer.sx,
|
||||
;; lib/ocaml/types.sx, …).
|
||||
;;
|
||||
;; Per the brief the second consumer for this step is OCaml-on-SX
|
||||
;; Phase 5 (paired sequencing). Until that lands, the algebra is the
|
||||
;; deliverable; the host-flavoured assembly (lambda / app / let
|
||||
;; inference rules with substitution threading) lives in the host.
|
||||
;;
|
||||
;; Types
|
||||
;; -----
|
||||
;; A type is a canonical match.sx term — type variables use mk-var,
|
||||
;; type constructors use mk-ctor:
|
||||
;; (hm-tv NAME) type variable
|
||||
;; (hm-arrow A B) A -> B
|
||||
;; (hm-con NAME ARGS) named n-ary constructor
|
||||
;; (hm-int) / (hm-bool) / (hm-string) primitive constructors
|
||||
;;
|
||||
;; Schemes
|
||||
;; -------
|
||||
;; (hm-scheme VARS TYPE) ∀ VARS . TYPE
|
||||
;; (hm-monotype TYPE) empty quantifier
|
||||
;; (hm-scheme? S) (hm-scheme-vars S) (hm-scheme-type S)
|
||||
;;
|
||||
;; Free type variables
|
||||
;; -------------------
|
||||
;; (hm-ftv TYPE) names occurring in TYPE
|
||||
;; (hm-ftv-scheme S) free names (minus quantifiers)
|
||||
;; (hm-ftv-env ENV) free across an env (name -> scheme)
|
||||
;;
|
||||
;; Substitution
|
||||
;; ------------
|
||||
;; (hm-apply SUBST TYPE) substitute through a type
|
||||
;; (hm-apply-scheme SUBST S) leaves bound vars alone
|
||||
;; (hm-apply-env SUBST ENV)
|
||||
;; (hm-compose S2 S1) apply S1 then S2
|
||||
;;
|
||||
;; Generalize / Instantiate
|
||||
;; ------------------------
|
||||
;; (hm-generalize TYPE ENV) → scheme over ftv(t) - ftv(env)
|
||||
;; (hm-instantiate SCHEME COUNTER) → fresh-var instance
|
||||
;; (hm-fresh-tv COUNTER) → (:var "tN"), bumps COUNTER
|
||||
;;
|
||||
;; Inference (literal only — the rest of Algorithm W lives in the host)
|
||||
;; --------------------------------------------------------------------
|
||||
;; (hm-infer-literal EXPR) → {:subst {} :type T}
|
||||
;;
|
||||
;; A complete Algorithm W consumes this kit by assembling lambda / app
|
||||
;; / let rules in the host language file.
|
||||
|
||||
(define hm-tv (fn (name) (list :var name)))
|
||||
(define hm-con (fn (name args) (list :ctor name args)))
|
||||
(define hm-arrow (fn (a b) (hm-con "->" (list a b))))
|
||||
(define hm-int (fn () (hm-con "Int" (list))))
|
||||
(define hm-bool (fn () (hm-con "Bool" (list))))
|
||||
(define hm-string (fn () (hm-con "String" (list))))
|
||||
|
||||
(define hm-scheme (fn (vars t) (list :scheme vars t)))
|
||||
(define hm-monotype (fn (t) (hm-scheme (list) t)))
|
||||
(define hm-scheme? (fn (s) (and (list? s) (not (empty? s)) (= (first s) :scheme))))
|
||||
(define hm-scheme-vars (fn (s) (nth s 1)))
|
||||
(define hm-scheme-type (fn (s) (nth s 2)))
|
||||
|
||||
(define
|
||||
hm-fresh-tv
|
||||
(fn (counter)
|
||||
(let ((n (first counter)))
|
||||
(begin
|
||||
(set-nth! counter 0 (+ n 1))
|
||||
(hm-tv (str "t" (+ n 1)))))))
|
||||
|
||||
(define
|
||||
hm-ftv-acc
|
||||
(fn (t acc)
|
||||
(cond
|
||||
((is-var? t)
|
||||
(if (some (fn (n) (= n (var-name t))) acc) acc (cons (var-name t) acc)))
|
||||
((is-ctor? t)
|
||||
(let ((a acc))
|
||||
(begin
|
||||
(for-each (fn (x) (set! a (hm-ftv-acc x a))) (ctor-args t))
|
||||
a)))
|
||||
(:else acc))))
|
||||
|
||||
(define hm-ftv (fn (t) (hm-ftv-acc t (list))))
|
||||
|
||||
(define
|
||||
hm-ftv-scheme
|
||||
(fn (s)
|
||||
(let ((qs (hm-scheme-vars s))
|
||||
(all (hm-ftv (hm-scheme-type s))))
|
||||
(filter (fn (n) (not (some (fn (q) (= q n)) qs))) all))))
|
||||
|
||||
(define
|
||||
hm-ftv-env
|
||||
(fn (env)
|
||||
(let ((acc (list)))
|
||||
(begin
|
||||
(for-each
|
||||
(fn (k)
|
||||
(for-each
|
||||
(fn (n)
|
||||
(when (not (some (fn (m) (= m n)) acc))
|
||||
(set! acc (cons n acc))))
|
||||
(hm-ftv-scheme (get env k))))
|
||||
(keys env))
|
||||
acc))))
|
||||
|
||||
(define hm-apply (fn (subst t) (walk* t subst)))
|
||||
|
||||
(define
|
||||
hm-apply-scheme
|
||||
(fn (subst s)
|
||||
(let ((qs (hm-scheme-vars s))
|
||||
(d {}))
|
||||
(begin
|
||||
(for-each
|
||||
(fn (k)
|
||||
(when (not (some (fn (q) (= q k)) qs))
|
||||
(dict-set! d k (get subst k))))
|
||||
(keys subst))
|
||||
(hm-scheme qs (walk* (hm-scheme-type s) d))))))
|
||||
|
||||
(define
|
||||
hm-apply-env
|
||||
(fn (subst env)
|
||||
(let ((d {}))
|
||||
(begin
|
||||
(for-each
|
||||
(fn (k) (dict-set! d k (hm-apply-scheme subst (get env k))))
|
||||
(keys env))
|
||||
d))))
|
||||
|
||||
(define
|
||||
hm-compose
|
||||
(fn (s2 s1)
|
||||
(let ((d {}))
|
||||
(begin
|
||||
(for-each (fn (k) (dict-set! d k (walk* (get s1 k) s2))) (keys s1))
|
||||
(for-each
|
||||
(fn (k) (when (not (has-key? d k)) (dict-set! d k (get s2 k))))
|
||||
(keys s2))
|
||||
d))))
|
||||
|
||||
(define
|
||||
hm-generalize
|
||||
(fn (t env)
|
||||
(let ((tvars (hm-ftv t))
|
||||
(evars (hm-ftv-env env)))
|
||||
(let ((qs (filter (fn (n) (not (some (fn (m) (= m n)) evars))) tvars)))
|
||||
(hm-scheme qs t)))))
|
||||
|
||||
(define
|
||||
hm-instantiate
|
||||
(fn (s counter)
|
||||
(let ((qs (hm-scheme-vars s))
|
||||
(subst {}))
|
||||
(begin
|
||||
(for-each
|
||||
(fn (q) (set! subst (assoc subst q (hm-fresh-tv counter))))
|
||||
qs)
|
||||
(walk* (hm-scheme-type s) subst)))))
|
||||
|
||||
;; Literal inference — the only AST kind whose typing rule is closed
|
||||
;; in the kit. Lambda / app / let live in host code so the host's own
|
||||
;; AST conventions stay untouched.
|
||||
(define
|
||||
hm-infer-literal
|
||||
(fn (expr)
|
||||
(let ((v (ast-literal-value expr)))
|
||||
(cond
|
||||
((number? v) {:subst {} :type (hm-int)})
|
||||
((string? v) {:subst {} :type (hm-string)})
|
||||
((boolean? v) {:subst {} :type (hm-bool)})
|
||||
(:else (error (str "hm-infer-literal: unknown kind: " v)))))))
|
||||
145
lib/guest/layout.sx
Normal file
145
lib/guest/layout.sx
Normal file
@@ -0,0 +1,145 @@
|
||||
;; lib/guest/layout.sx — configurable off-side / layout-sensitive lexer.
|
||||
;;
|
||||
;; Inserts virtual open / close / separator tokens based on indentation.
|
||||
;; Configurable enough to encode either the Haskell 98 layout rule (let /
|
||||
;; where / do / of opens a virtual brace at the next token's column) or
|
||||
;; a Python-ish indent / dedent rule (a colon at the end of a line opens
|
||||
;; a block at the next non-blank line's column).
|
||||
;;
|
||||
;; Token shape (input + output)
|
||||
;; ----------------------------
|
||||
;; Each token is a dict {:type :value :line :col …}. The kit reads
|
||||
;; only :type / :value / :line / :col and passes everything else
|
||||
;; through. The input stream MUST be free of newline filler tokens
|
||||
;; (preprocess them away with your tokenizer) — line breaks are detected
|
||||
;; by comparing :line of consecutive tokens.
|
||||
;;
|
||||
;; Config
|
||||
;; ------
|
||||
;; :open-keywords list of strings; a token whose :value matches
|
||||
;; opens a new layout block at the next token's
|
||||
;; column (Haskell: let/where/do/of).
|
||||
;; :open-trailing-fn (fn (tok) -> bool) — alternative trigger that
|
||||
;; fires AFTER the token is emitted. Use for
|
||||
;; Python-style trailing `:`.
|
||||
;; :open-token / :close-token / :sep-token
|
||||
;; templates {:type :value} merged with :line and
|
||||
;; :col when virtual tokens are emitted.
|
||||
;; :explicit-open? (fn (tok) -> bool) — if the next token after a
|
||||
;; trigger satisfies this, suppress virtual layout
|
||||
;; for that block (Haskell: `{`).
|
||||
;; :module-prelude? if true, wrap whole input in an implicit block
|
||||
;; at the first token's column (Haskell yes,
|
||||
;; Python no).
|
||||
;;
|
||||
;; Public entry
|
||||
;; ------------
|
||||
;; (layout-pass cfg tokens) -> tokens with virtual layout inserted.
|
||||
|
||||
(define
|
||||
layout-mk-virtual
|
||||
(fn (template line col)
|
||||
(assoc (assoc template :line line) :col col)))
|
||||
|
||||
(define
|
||||
layout-is-open-kw?
|
||||
(fn (tok open-kws)
|
||||
(and (= (get tok :type) "reserved")
|
||||
(some (fn (k) (= k (get tok :value))) open-kws))))
|
||||
|
||||
(define
|
||||
layout-pass
|
||||
(fn (cfg tokens)
|
||||
(let ((open-kws (get cfg :open-keywords))
|
||||
(trailing-fn (get cfg :open-trailing-fn))
|
||||
(open-tmpl (get cfg :open-token))
|
||||
(close-tmpl (get cfg :close-token))
|
||||
(sep-tmpl (get cfg :sep-token))
|
||||
(mod-prelude? (get cfg :module-prelude?))
|
||||
(expl?-fn (get cfg :explicit-open?))
|
||||
(out (list))
|
||||
(stack (list))
|
||||
(n (len tokens))
|
||||
(i 0)
|
||||
(prev-line -1)
|
||||
(pending-open false)
|
||||
(just-opened false))
|
||||
(define
|
||||
emit-closes-while-greater
|
||||
(fn (col line)
|
||||
(when (and (not (empty? stack)) (> (first stack) col))
|
||||
(do
|
||||
(append! out (layout-mk-virtual close-tmpl line col))
|
||||
(set! stack (rest stack))
|
||||
(emit-closes-while-greater col line)))))
|
||||
(define
|
||||
emit-pending-open
|
||||
(fn (line col)
|
||||
(do
|
||||
(append! out (layout-mk-virtual open-tmpl line col))
|
||||
(set! stack (cons col stack))
|
||||
(set! pending-open false)
|
||||
(set! just-opened true))))
|
||||
(define
|
||||
layout-step
|
||||
(fn ()
|
||||
(when (< i n)
|
||||
(let ((tok (nth tokens i)))
|
||||
(let ((line (get tok :line)) (col (get tok :col)))
|
||||
(cond
|
||||
(pending-open
|
||||
(cond
|
||||
((and (not (= expl?-fn nil)) (expl?-fn tok))
|
||||
(do
|
||||
(set! pending-open false)
|
||||
(append! out tok)
|
||||
(set! prev-line line)
|
||||
(set! i (+ i 1))
|
||||
(layout-step)))
|
||||
(:else
|
||||
(do
|
||||
(emit-pending-open line col)
|
||||
(layout-step)))))
|
||||
(:else
|
||||
(let ((on-fresh-line? (and (> prev-line 0) (> line prev-line))))
|
||||
(do
|
||||
(when on-fresh-line?
|
||||
(let ((stack-before stack))
|
||||
(begin
|
||||
(emit-closes-while-greater col line)
|
||||
(when (and (not (empty? stack))
|
||||
(= (first stack) col)
|
||||
(not just-opened)
|
||||
;; suppress separator if a dedent fired
|
||||
;; — the dedent is itself the separator
|
||||
(= (len stack) (len stack-before)))
|
||||
(append! out (layout-mk-virtual sep-tmpl line col))))))
|
||||
(set! just-opened false)
|
||||
(append! out tok)
|
||||
(set! prev-line line)
|
||||
(set! i (+ i 1))
|
||||
(cond
|
||||
((layout-is-open-kw? tok open-kws)
|
||||
(set! pending-open true))
|
||||
((and (not (= trailing-fn nil)) (trailing-fn tok))
|
||||
(set! pending-open true)))
|
||||
(layout-step))))))))))
|
||||
(begin
|
||||
;; Module prelude: implicit layout block at the first token's column.
|
||||
(when (and mod-prelude? (> n 0))
|
||||
(let ((tok (nth tokens 0)))
|
||||
(do
|
||||
(append! out (layout-mk-virtual open-tmpl (get tok :line) (get tok :col)))
|
||||
(set! stack (cons (get tok :col) stack))
|
||||
(set! just-opened true))))
|
||||
(layout-step)
|
||||
;; EOF: close every remaining block.
|
||||
(define close-rest
|
||||
(fn ()
|
||||
(when (not (empty? stack))
|
||||
(do
|
||||
(append! out (layout-mk-virtual close-tmpl 0 0))
|
||||
(set! stack (rest stack))
|
||||
(close-rest)))))
|
||||
(close-rest)
|
||||
out))))
|
||||
89
lib/guest/tests/hm.sx
Normal file
89
lib/guest/tests/hm.sx
Normal file
@@ -0,0 +1,89 @@
|
||||
;; lib/guest/tests/hm.sx — exercises lib/guest/hm.sx algebra.
|
||||
|
||||
(define ghm-test-pass 0)
|
||||
(define ghm-test-fail 0)
|
||||
(define ghm-test-fails (list))
|
||||
|
||||
(define
|
||||
ghm-test
|
||||
(fn (name actual expected)
|
||||
(if (= actual expected)
|
||||
(set! ghm-test-pass (+ ghm-test-pass 1))
|
||||
(begin
|
||||
(set! ghm-test-fail (+ ghm-test-fail 1))
|
||||
(append! ghm-test-fails {:name name :expected expected :actual actual})))))
|
||||
|
||||
;; ── Type constructors ─────────────────────────────────────────────
|
||||
(ghm-test "tv" (hm-tv "a") (list :var "a"))
|
||||
(ghm-test "int" (hm-int) (list :ctor "Int" (list)))
|
||||
(ghm-test "arrow" (ctor-head (hm-arrow (hm-int) (hm-bool))) "->")
|
||||
(ghm-test "arrow-args-len" (len (ctor-args (hm-arrow (hm-int) (hm-bool)))) 2)
|
||||
|
||||
;; ── Schemes ───────────────────────────────────────────────────────
|
||||
(ghm-test "scheme-vars" (hm-scheme-vars (hm-scheme (list "a") (hm-tv "a"))) (list "a"))
|
||||
(ghm-test "monotype-vars" (hm-scheme-vars (hm-monotype (hm-int))) (list))
|
||||
(ghm-test "scheme?-yes" (hm-scheme? (hm-monotype (hm-int))) true)
|
||||
(ghm-test "scheme?-no" (hm-scheme? (hm-int)) false)
|
||||
|
||||
;; ── Fresh tyvars ──────────────────────────────────────────────────
|
||||
(ghm-test "fresh-1"
|
||||
(let ((c (list 0))) (var-name (hm-fresh-tv c))) "t1")
|
||||
(ghm-test "fresh-bumps"
|
||||
(let ((c (list 5))) (begin (hm-fresh-tv c) (first c))) 6)
|
||||
|
||||
;; ── Free type variables ──────────────────────────────────────────
|
||||
(ghm-test "ftv-int" (hm-ftv (hm-int)) (list))
|
||||
(ghm-test "ftv-tv" (hm-ftv (hm-tv "a")) (list "a"))
|
||||
(ghm-test "ftv-arrow"
|
||||
(len (hm-ftv (hm-arrow (hm-tv "a") (hm-arrow (hm-tv "b") (hm-tv "a"))))) 2)
|
||||
(ghm-test "ftv-scheme-quantified"
|
||||
(hm-ftv-scheme (hm-scheme (list "a") (hm-arrow (hm-tv "a") (hm-tv "b")))) (list "b"))
|
||||
(ghm-test "ftv-env"
|
||||
(let ((env (assoc {} "f" (hm-monotype (hm-arrow (hm-tv "x") (hm-tv "y"))))))
|
||||
(len (hm-ftv-env env))) 2)
|
||||
|
||||
;; ── Substitution / apply / compose ───────────────────────────────
|
||||
(ghm-test "apply-tv"
|
||||
(hm-apply (assoc {} "a" (hm-int)) (hm-tv "a")) (hm-int))
|
||||
(ghm-test "apply-arrow"
|
||||
(ctor-head
|
||||
(hm-apply (assoc {} "a" (hm-int))
|
||||
(hm-arrow (hm-tv "a") (hm-tv "b")))) "->")
|
||||
(ghm-test "compose-1-then-2"
|
||||
(var-name
|
||||
(hm-apply
|
||||
(hm-compose (assoc {} "b" (hm-tv "c")) (assoc {} "a" (hm-tv "b")))
|
||||
(hm-tv "a"))) "c")
|
||||
|
||||
;; ── Generalize / Instantiate ─────────────────────────────────────
|
||||
;; forall a. a -> a instantiated twice yields fresh vars each time
|
||||
(ghm-test "generalize-id"
|
||||
(len (hm-scheme-vars (hm-generalize (hm-arrow (hm-tv "a") (hm-tv "a")) {}))) 1)
|
||||
|
||||
(ghm-test "generalize-skips-env"
|
||||
;; ftv(t)={a,b}, ftv(env)={a}, qs={b}
|
||||
(let ((env (assoc {} "x" (hm-monotype (hm-tv "a")))))
|
||||
(len (hm-scheme-vars
|
||||
(hm-generalize (hm-arrow (hm-tv "a") (hm-tv "b")) env)))) 1)
|
||||
|
||||
(ghm-test "instantiate-fresh"
|
||||
(let ((s (hm-scheme (list "a") (hm-arrow (hm-tv "a") (hm-tv "a"))))
|
||||
(c (list 0)))
|
||||
(let ((t1 (hm-instantiate s c)) (t2 (hm-instantiate s c)))
|
||||
(not (= (var-name (first (ctor-args t1)))
|
||||
(var-name (first (ctor-args t2)))))))
|
||||
true)
|
||||
|
||||
;; ── Inference (literal only) ─────────────────────────────────────
|
||||
(ghm-test "infer-int"
|
||||
(ctor-head (get (hm-infer-literal (ast-literal 42)) :type)) "Int")
|
||||
(ghm-test "infer-string"
|
||||
(ctor-head (get (hm-infer-literal (ast-literal "hi")) :type)) "String")
|
||||
(ghm-test "infer-bool"
|
||||
(ctor-head (get (hm-infer-literal (ast-literal true)) :type)) "Bool")
|
||||
|
||||
(define ghm-tests-run!
|
||||
(fn ()
|
||||
{:passed ghm-test-pass
|
||||
:failed ghm-test-fail
|
||||
:total (+ ghm-test-pass ghm-test-fail)}))
|
||||
180
lib/guest/tests/layout.sx
Normal file
180
lib/guest/tests/layout.sx
Normal file
@@ -0,0 +1,180 @@
|
||||
;; lib/guest/tests/layout.sx — synthetic Python-ish off-side fixture.
|
||||
;;
|
||||
;; Exercises lib/guest/layout.sx with a config different from Haskell's
|
||||
;; (no module-prelude, layout opens via trailing `:` not via reserved
|
||||
;; keyword) to prove the kit isn't Haskell-shaped.
|
||||
|
||||
(define glayout-test-pass 0)
|
||||
(define glayout-test-fail 0)
|
||||
(define glayout-test-fails (list))
|
||||
|
||||
(define
|
||||
glayout-test
|
||||
(fn (name actual expected)
|
||||
(if (= actual expected)
|
||||
(set! glayout-test-pass (+ glayout-test-pass 1))
|
||||
(begin
|
||||
(set! glayout-test-fail (+ glayout-test-fail 1))
|
||||
(append! glayout-test-fails {:name name :expected expected :actual actual})))))
|
||||
|
||||
;; Convenience: build a token from {type value line col}.
|
||||
(define
|
||||
glayout-tok
|
||||
(fn (ty val line col)
|
||||
{:type ty :value val :line line :col col}))
|
||||
|
||||
;; Project a token list to ((type value) ...) for compact comparison.
|
||||
(define
|
||||
glayout-shape
|
||||
(fn (toks)
|
||||
(map (fn (t) (list (get t :type) (get t :value))) toks)))
|
||||
|
||||
;; ── Haskell-flavour: keyword opens block ─────────────────────────
|
||||
(define
|
||||
glayout-haskell-cfg
|
||||
{:open-keywords (list "let" "where" "do" "of")
|
||||
:open-trailing-fn nil
|
||||
:open-token {:type "vlbrace" :value "{"}
|
||||
:close-token {:type "vrbrace" :value "}"}
|
||||
:sep-token {:type "vsemi" :value ";"}
|
||||
:module-prelude? false
|
||||
:explicit-open? (fn (tok) (= (get tok :type) "lbrace"))})
|
||||
|
||||
;; do
|
||||
;; a
|
||||
;; b
|
||||
;; c ← outside the do-block
|
||||
(glayout-test "haskell-do-block"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-haskell-cfg
|
||||
(list (glayout-tok "reserved" "do" 1 1)
|
||||
(glayout-tok "ident" "a" 2 3)
|
||||
(glayout-tok "ident" "b" 3 3)
|
||||
(glayout-tok "ident" "c" 4 1))))
|
||||
(list (list "reserved" "do")
|
||||
(list "vlbrace" "{")
|
||||
(list "ident" "a")
|
||||
(list "vsemi" ";")
|
||||
(list "ident" "b")
|
||||
(list "vrbrace" "}")
|
||||
(list "ident" "c")))
|
||||
|
||||
;; Explicit `{` after `do` suppresses virtual layout.
|
||||
(glayout-test "haskell-explicit-brace"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-haskell-cfg
|
||||
(list (glayout-tok "reserved" "do" 1 1)
|
||||
(glayout-tok "lbrace" "{" 1 4)
|
||||
(glayout-tok "ident" "a" 1 6)
|
||||
(glayout-tok "rbrace" "}" 1 8))))
|
||||
(list (list "reserved" "do")
|
||||
(list "lbrace" "{")
|
||||
(list "ident" "a")
|
||||
(list "rbrace" "}")))
|
||||
|
||||
;; Single-statement do-block on the same line.
|
||||
(glayout-test "haskell-do-inline"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-haskell-cfg
|
||||
(list (glayout-tok "reserved" "do" 1 1)
|
||||
(glayout-tok "ident" "a" 1 4))))
|
||||
(list (list "reserved" "do")
|
||||
(list "vlbrace" "{")
|
||||
(list "ident" "a")
|
||||
(list "vrbrace" "}")))
|
||||
|
||||
;; Module-prelude: wrap whole input in implicit layout block at first
|
||||
;; tok's column.
|
||||
(glayout-test "haskell-module-prelude"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
(assoc glayout-haskell-cfg :module-prelude? true)
|
||||
(list (glayout-tok "ident" "x" 1 1)
|
||||
(glayout-tok "ident" "y" 2 1)
|
||||
(glayout-tok "ident" "z" 3 1))))
|
||||
(list (list "vlbrace" "{")
|
||||
(list "ident" "x")
|
||||
(list "vsemi" ";")
|
||||
(list "ident" "y")
|
||||
(list "vsemi" ";")
|
||||
(list "ident" "z")
|
||||
(list "vrbrace" "}")))
|
||||
|
||||
;; ── Python-flavour: trailing `:` opens block ─────────────────────
|
||||
(define
|
||||
glayout-python-cfg
|
||||
{:open-keywords (list)
|
||||
:open-trailing-fn (fn (tok) (and (= (get tok :type) "punct")
|
||||
(= (get tok :value) ":")))
|
||||
:open-token {:type "indent" :value "INDENT"}
|
||||
:close-token {:type "dedent" :value "DEDENT"}
|
||||
:sep-token {:type "newline" :value "NEWLINE"}
|
||||
:module-prelude? false
|
||||
:explicit-open? nil})
|
||||
|
||||
;; if x:
|
||||
;; a
|
||||
;; b
|
||||
;; c
|
||||
(glayout-test "python-if-block"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-python-cfg
|
||||
(list (glayout-tok "reserved" "if" 1 1)
|
||||
(glayout-tok "ident" "x" 1 4)
|
||||
(glayout-tok "punct" ":" 1 5)
|
||||
(glayout-tok "ident" "a" 2 5)
|
||||
(glayout-tok "ident" "b" 3 5)
|
||||
(glayout-tok "ident" "c" 4 1))))
|
||||
(list (list "reserved" "if")
|
||||
(list "ident" "x")
|
||||
(list "punct" ":")
|
||||
(list "indent" "INDENT")
|
||||
(list "ident" "a")
|
||||
(list "newline" "NEWLINE")
|
||||
(list "ident" "b")
|
||||
(list "dedent" "DEDENT")
|
||||
(list "ident" "c")))
|
||||
|
||||
;; Nested Python-style blocks.
|
||||
;; def f():
|
||||
;; if x:
|
||||
;; a
|
||||
;; b
|
||||
(glayout-test "python-nested"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-python-cfg
|
||||
(list (glayout-tok "reserved" "def" 1 1)
|
||||
(glayout-tok "ident" "f" 1 5)
|
||||
(glayout-tok "punct" "(" 1 6)
|
||||
(glayout-tok "punct" ")" 1 7)
|
||||
(glayout-tok "punct" ":" 1 8)
|
||||
(glayout-tok "reserved" "if" 2 5)
|
||||
(glayout-tok "ident" "x" 2 8)
|
||||
(glayout-tok "punct" ":" 2 9)
|
||||
(glayout-tok "ident" "a" 3 9)
|
||||
(glayout-tok "ident" "b" 4 5))))
|
||||
(list (list "reserved" "def")
|
||||
(list "ident" "f")
|
||||
(list "punct" "(")
|
||||
(list "punct" ")")
|
||||
(list "punct" ":")
|
||||
(list "indent" "INDENT")
|
||||
(list "reserved" "if")
|
||||
(list "ident" "x")
|
||||
(list "punct" ":")
|
||||
(list "indent" "INDENT")
|
||||
(list "ident" "a")
|
||||
(list "dedent" "DEDENT")
|
||||
(list "ident" "b")
|
||||
(list "dedent" "DEDENT")))
|
||||
|
||||
(define glayout-tests-run!
|
||||
(fn ()
|
||||
{:passed glayout-test-pass
|
||||
:failed glayout-test-fail
|
||||
:total (+ glayout-test-pass glayout-test-fail)}))
|
||||
418
lib/ocaml/parser.sx
Normal file
418
lib/ocaml/parser.sx
Normal file
@@ -0,0 +1,418 @@
|
||||
;; lib/ocaml/parser.sx — OCaml expression parser.
|
||||
;;
|
||||
;; Input: token list from (ocaml-tokenize src).
|
||||
;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head;
|
||||
;; keywords serialize to their string name so `(list :var "x")` is the
|
||||
;; same value as `(list "var" "x")` at runtime.
|
||||
;;
|
||||
;; Scope (this iteration — expressions only):
|
||||
;; atoms int/float/string/char, true/false, unit (), var, con, list literal
|
||||
;; application left-associative, f x y z
|
||||
;; prefix -E unary minus, not E
|
||||
;; infix standard ops via lib/guest/pratt.sx table
|
||||
;; tuple a, b, c (lower than infix, higher than let/if)
|
||||
;; parens (e)
|
||||
;; if if c then t else e (else optional → unit)
|
||||
;; fun fun x y -> body
|
||||
;; let let x = e in body (no rec)
|
||||
;; let f x y = e in body (function shorthand)
|
||||
;; let rec f x = e in body
|
||||
;;
|
||||
;; AST shapes:
|
||||
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
|
||||
;; (:var NAME) (:con NAME)
|
||||
;; (:app FN ARG) — binary, chain for multi-arg
|
||||
;; (:op OP LHS RHS) — binary infix; OP is the source string
|
||||
;; (:neg E) (:not E)
|
||||
;; (:tuple ITEMS)
|
||||
;; (:list ITEMS)
|
||||
;; (:if C T E)
|
||||
;; (:fun PARAMS BODY) — PARAMS list of strings (idents)
|
||||
;; (:let NAME PARAMS EXPR BODY)
|
||||
;; (:let-rec NAME PARAMS EXPR BODY)
|
||||
|
||||
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
|
||||
|
||||
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
|
||||
|
||||
;; Standard OCaml binary operator table.
|
||||
;; Higher precedence = tighter binding.
|
||||
;; ASSOC is :left or :right.
|
||||
(define
|
||||
ocaml-op-table
|
||||
(list
|
||||
(list "||" 2 :right)
|
||||
(list "or" 2 :right)
|
||||
(list "&&" 3 :right)
|
||||
(list "&" 3 :right)
|
||||
(list "=" 4 :left)
|
||||
(list "<" 4 :left)
|
||||
(list ">" 4 :left)
|
||||
(list "<=" 4 :left)
|
||||
(list ">=" 4 :left)
|
||||
(list "<>" 4 :left)
|
||||
(list "==" 4 :left)
|
||||
(list "!=" 4 :left)
|
||||
(list "|>" 4 :left)
|
||||
(list "@" 5 :right)
|
||||
(list "^" 5 :right)
|
||||
(list "::" 6 :right)
|
||||
(list "+" 7 :left)
|
||||
(list "-" 7 :left)
|
||||
(list "*" 8 :left)
|
||||
(list "/" 8 :left)
|
||||
(list "%" 8 :left)
|
||||
(list "mod" 8 :left)
|
||||
(list "land" 8 :left)
|
||||
(list "lor" 8 :left)
|
||||
(list "lxor" 8 :left)
|
||||
(list "**" 9 :right)
|
||||
(list "lsl" 9 :right)
|
||||
(list "lsr" 9 :right)
|
||||
(list "asr" 9 :right)))
|
||||
|
||||
(define
|
||||
ocaml-binop-prec
|
||||
(fn
|
||||
(op)
|
||||
(let
|
||||
((entry (pratt-op-lookup ocaml-op-table op)))
|
||||
(if (= entry nil) 0 (pratt-op-prec entry)))))
|
||||
|
||||
(define
|
||||
ocaml-binop-right?
|
||||
(fn
|
||||
(op)
|
||||
(let
|
||||
((entry (pratt-op-lookup ocaml-op-table op)))
|
||||
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
|
||||
|
||||
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
|
||||
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
|
||||
(define
|
||||
ocaml-tok-is-binop?
|
||||
(fn
|
||||
(tok)
|
||||
(let
|
||||
((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok)))
|
||||
(cond
|
||||
((= tt "op") (not (= (ocaml-binop-prec tv) 0)))
|
||||
((= tt "keyword") (not (= (ocaml-binop-prec tv) 0)))
|
||||
(else false)))))
|
||||
|
||||
(define
|
||||
ocaml-parse
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0))
|
||||
(begin
|
||||
(set! tok-len (len tokens))
|
||||
(define peek-tok (fn () (nth tokens idx)))
|
||||
(define advance-tok! (fn () (set! idx (+ idx 1))))
|
||||
(define
|
||||
check-tok?
|
||||
(fn
|
||||
(type value)
|
||||
(let
|
||||
((t (peek-tok)))
|
||||
(and
|
||||
(= (ocaml-tok-type t) type)
|
||||
(or (= value nil) (= (ocaml-tok-value t) value))))))
|
||||
(define
|
||||
consume!
|
||||
(fn
|
||||
(type value)
|
||||
(if
|
||||
(check-tok? type value)
|
||||
(let ((t (peek-tok))) (begin (advance-tok!) t))
|
||||
(error
|
||||
(str
|
||||
"ocaml-parse: expected "
|
||||
type
|
||||
" "
|
||||
value
|
||||
" got "
|
||||
(ocaml-tok-type (peek-tok))
|
||||
" "
|
||||
(ocaml-tok-value (peek-tok)))))))
|
||||
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
|
||||
(define at-op? (fn (op) (check-tok? "op" op)))
|
||||
(define parse-expr (fn () nil))
|
||||
(define parse-tuple (fn () nil))
|
||||
(define parse-binop-rhs (fn (lhs min-prec) lhs))
|
||||
(define parse-prefix (fn () nil))
|
||||
(define parse-app (fn () nil))
|
||||
(define parse-atom (fn () nil))
|
||||
(set!
|
||||
parse-atom
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((t (peek-tok))
|
||||
(tt (ocaml-tok-type (peek-tok)))
|
||||
(tv (ocaml-tok-value (peek-tok))))
|
||||
(cond
|
||||
((= tt "number")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(if (= (round tv) tv) (list :int tv) (list :float tv))))
|
||||
((= tt "string") (begin (advance-tok!) (list :string tv)))
|
||||
((= tt "char") (begin (advance-tok!) (list :char tv)))
|
||||
((and (= tt "keyword") (= tv "true"))
|
||||
(begin (advance-tok!) (list :bool true)))
|
||||
((and (= tt "keyword") (= tv "false"))
|
||||
(begin (advance-tok!) (list :bool false)))
|
||||
((= tt "ident") (begin (advance-tok!) (list :var tv)))
|
||||
((= tt "ctor") (begin (advance-tok!) (list :con tv)))
|
||||
((and (= tt "op") (= tv "("))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((at-op? ")") (begin (advance-tok!) (list :unit)))
|
||||
(else
|
||||
(let
|
||||
((e (parse-expr)))
|
||||
(begin (consume! "op" ")") e))))))
|
||||
((and (= tt "op") (= tv "["))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((at-op? "]") (begin (advance-tok!) (list :list)))
|
||||
(else
|
||||
(let
|
||||
((items (list)))
|
||||
(begin
|
||||
(append! items (parse-expr))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-op? ";")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(when
|
||||
(not (at-op? "]"))
|
||||
(begin
|
||||
(append! items (parse-expr))
|
||||
(loop)))))))
|
||||
(loop)
|
||||
(consume! "op" "]")
|
||||
(cons :list items)))))))
|
||||
((at-kw? "begin")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((e (parse-expr)))
|
||||
(begin (consume! "keyword" "end") e))))
|
||||
(else
|
||||
(error
|
||||
(str
|
||||
"ocaml-parse: unexpected token "
|
||||
tt
|
||||
" "
|
||||
tv
|
||||
" at idx "
|
||||
idx)))))))
|
||||
(define
|
||||
at-app-start?
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((tt (ocaml-tok-type (peek-tok)))
|
||||
(tv (ocaml-tok-value (peek-tok))))
|
||||
(cond
|
||||
((= tt "number") true)
|
||||
((= tt "string") true)
|
||||
((= tt "char") true)
|
||||
((= tt "ident") true)
|
||||
((= tt "ctor") true)
|
||||
((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin")))
|
||||
true)
|
||||
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
|
||||
(else false)))))
|
||||
(set!
|
||||
parse-app
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((head (parse-atom)))
|
||||
(begin
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-app-start?)
|
||||
(let
|
||||
((arg (parse-atom)))
|
||||
(begin (set! head (list :app head arg)) (loop))))))
|
||||
(loop)
|
||||
head))))
|
||||
(set!
|
||||
parse-prefix
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at-op? "-")
|
||||
(begin (advance-tok!) (list :neg (parse-prefix))))
|
||||
((at-kw? "not")
|
||||
(begin (advance-tok!) (list :not (parse-prefix))))
|
||||
(else (parse-app)))))
|
||||
(set!
|
||||
parse-binop-rhs
|
||||
(fn
|
||||
(lhs min-prec)
|
||||
(let
|
||||
((tok (peek-tok)))
|
||||
(cond
|
||||
((not (ocaml-tok-is-binop? tok)) lhs)
|
||||
(else
|
||||
(let
|
||||
((op (ocaml-tok-value tok))
|
||||
(prec (ocaml-binop-prec (ocaml-tok-value tok))))
|
||||
(cond
|
||||
((< prec min-prec) lhs)
|
||||
(else
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((rhs (parse-prefix))
|
||||
(next-min
|
||||
(if
|
||||
(ocaml-binop-right? op)
|
||||
prec
|
||||
(+ prec 1))))
|
||||
(begin
|
||||
(set! rhs (parse-binop-rhs rhs next-min))
|
||||
(parse-binop-rhs (list :op op lhs rhs) min-prec))))))))))))
|
||||
(define
|
||||
parse-binary
|
||||
(fn
|
||||
()
|
||||
(let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1))))
|
||||
(set!
|
||||
parse-tuple
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((first (parse-binary)))
|
||||
(cond
|
||||
((at-op? ",")
|
||||
(let
|
||||
((items (list first)))
|
||||
(begin
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-op? ",")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! items (parse-binary))
|
||||
(loop)))))
|
||||
(loop)
|
||||
(cons :tuple items))))
|
||||
(else first)))))
|
||||
(define
|
||||
parse-fun
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((params (list)))
|
||||
(begin
|
||||
(define
|
||||
collect-params
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(check-tok? "ident" nil)
|
||||
(begin
|
||||
(append! params (ocaml-tok-value (peek-tok)))
|
||||
(advance-tok!)
|
||||
(collect-params)))))
|
||||
(collect-params)
|
||||
(when
|
||||
(= (len params) 0)
|
||||
(error "ocaml-parse: fun expects at least one parameter"))
|
||||
(consume! "op" "->")
|
||||
(let ((body (parse-expr))) (list :fun params body))))))
|
||||
(define
|
||||
parse-let
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((reccy false))
|
||||
(begin
|
||||
(when
|
||||
(at-kw? "rec")
|
||||
(begin (advance-tok!) (set! reccy true)))
|
||||
(let
|
||||
((name (ocaml-tok-value (consume! "ident" nil)))
|
||||
(params (list)))
|
||||
(begin
|
||||
(define
|
||||
collect-params
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(check-tok? "ident" nil)
|
||||
(begin
|
||||
(append! params (ocaml-tok-value (peek-tok)))
|
||||
(advance-tok!)
|
||||
(collect-params)))))
|
||||
(collect-params)
|
||||
(consume! "op" "=")
|
||||
(let
|
||||
((rhs (parse-expr)))
|
||||
(begin
|
||||
(consume! "keyword" "in")
|
||||
(let
|
||||
((body (parse-expr)))
|
||||
(if
|
||||
reccy
|
||||
(list :let-rec name params rhs body)
|
||||
(list :let name params rhs body)))))))))))
|
||||
(define
|
||||
parse-if
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((cond-expr (parse-expr)))
|
||||
(begin
|
||||
(consume! "keyword" "then")
|
||||
(let
|
||||
((then-expr (parse-expr)))
|
||||
(cond
|
||||
((at-kw? "else")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((else-expr (parse-expr)))
|
||||
(list :if cond-expr then-expr else-expr))))
|
||||
(else (list :if cond-expr then-expr (list :unit)))))))))
|
||||
(set!
|
||||
parse-expr
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
|
||||
((at-kw? "let") (begin (advance-tok!) (parse-let)))
|
||||
((at-kw? "if") (begin (advance-tok!) (parse-if)))
|
||||
(else (parse-tuple)))))
|
||||
(let
|
||||
((result (parse-expr)))
|
||||
(begin
|
||||
(when
|
||||
(not (= (ocaml-tok-type (peek-tok)) "eof"))
|
||||
(error
|
||||
(str
|
||||
"ocaml-parse: trailing tokens at idx "
|
||||
idx
|
||||
" — got "
|
||||
(ocaml-tok-type (peek-tok))
|
||||
" "
|
||||
(ocaml-tok-value (peek-tok)))))
|
||||
result))))))
|
||||
426
lib/ocaml/test.sh
Executable file
426
lib/ocaml/test.sh
Executable file
@@ -0,0 +1,426 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fast OCaml-on-SX test runner — epoch protocol direct to sx_server.exe.
|
||||
# Mirrors lib/lua/test.sh.
|
||||
#
|
||||
# Usage:
|
||||
# bash lib/ocaml/test.sh # run all tests
|
||||
# bash lib/ocaml/test.sh -v # verbose
|
||||
|
||||
set -uo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
fi
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
VERBOSE="${1:-}"
|
||||
PASS=0
|
||||
FAIL=0
|
||||
ERRORS=""
|
||||
TMPFILE=$(mktemp)
|
||||
trap "rm -f $TMPFILE" EXIT
|
||||
|
||||
cat > "$TMPFILE" << 'EPOCHS'
|
||||
(epoch 1)
|
||||
(load "lib/guest/lex.sx")
|
||||
(load "lib/guest/prefix.sx")
|
||||
(load "lib/guest/pratt.sx")
|
||||
(load "lib/ocaml/tokenizer.sx")
|
||||
(load "lib/ocaml/parser.sx")
|
||||
(load "lib/ocaml/tests/tokenize.sx")
|
||||
|
||||
;; ── empty / eof ────────────────────────────────────────────────
|
||||
(epoch 100)
|
||||
(eval "(ocaml-test-tok-count \"\")")
|
||||
(epoch 101)
|
||||
(eval "(ocaml-test-tok-type \"\" 0)")
|
||||
|
||||
;; ── numbers ────────────────────────────────────────────────────
|
||||
(epoch 110)
|
||||
(eval "(ocaml-test-tok-type \"42\" 0)")
|
||||
(epoch 111)
|
||||
(eval "(ocaml-test-tok-value \"42\" 0)")
|
||||
(epoch 112)
|
||||
(eval "(ocaml-test-tok-value \"3.14\" 0)")
|
||||
(epoch 113)
|
||||
(eval "(ocaml-test-tok-value \"0xff\" 0)")
|
||||
(epoch 114)
|
||||
(eval "(ocaml-test-tok-value \"1e3\" 0)")
|
||||
(epoch 115)
|
||||
(eval "(ocaml-test-tok-value \"1_000_000\" 0)")
|
||||
(epoch 116)
|
||||
(eval "(ocaml-test-tok-value \"3.14e-2\" 0)")
|
||||
|
||||
;; ── identifiers / constructors / keywords ─────────────────────
|
||||
(epoch 120)
|
||||
(eval "(ocaml-test-tok-type \"foo\" 0)")
|
||||
(epoch 121)
|
||||
(eval "(ocaml-test-tok-value \"foo_bar1\" 0)")
|
||||
(epoch 122)
|
||||
(eval "(ocaml-test-tok-type \"Some\" 0)")
|
||||
(epoch 123)
|
||||
(eval "(ocaml-test-tok-value \"Some\" 0)")
|
||||
(epoch 124)
|
||||
(eval "(ocaml-test-tok-type \"let\" 0)")
|
||||
(epoch 125)
|
||||
(eval "(ocaml-test-tok-value \"match\" 0)")
|
||||
(epoch 126)
|
||||
(eval "(ocaml-test-tok-type \"true\" 0)")
|
||||
(epoch 127)
|
||||
(eval "(ocaml-test-tok-value \"false\" 0)")
|
||||
(epoch 128)
|
||||
(eval "(ocaml-test-tok-value \"name'\" 0)")
|
||||
|
||||
;; ── strings ────────────────────────────────────────────────────
|
||||
(epoch 130)
|
||||
(eval "(ocaml-test-tok-type \"\\\"hi\\\"\" 0)")
|
||||
(epoch 131)
|
||||
(eval "(ocaml-test-tok-value \"\\\"hi\\\"\" 0)")
|
||||
(epoch 132)
|
||||
(eval "(ocaml-test-tok-value \"\\\"a\\\\nb\\\"\" 0)")
|
||||
|
||||
;; ── chars ──────────────────────────────────────────────────────
|
||||
(epoch 140)
|
||||
(eval "(ocaml-test-tok-type \"'a'\" 0)")
|
||||
(epoch 141)
|
||||
(eval "(ocaml-test-tok-value \"'a'\" 0)")
|
||||
(epoch 142)
|
||||
(eval "(ocaml-test-tok-value \"'\\\\n'\" 0)")
|
||||
|
||||
;; ── type variables ─────────────────────────────────────────────
|
||||
(epoch 145)
|
||||
(eval "(ocaml-test-tok-type \"'a\" 0)")
|
||||
(epoch 146)
|
||||
(eval "(ocaml-test-tok-value \"'a\" 0)")
|
||||
|
||||
;; ── multi-char operators ───────────────────────────────────────
|
||||
(epoch 150)
|
||||
(eval "(ocaml-test-tok-value \"->\" 0)")
|
||||
(epoch 151)
|
||||
(eval "(ocaml-test-tok-value \"|>\" 0)")
|
||||
(epoch 152)
|
||||
(eval "(ocaml-test-tok-value \"<-\" 0)")
|
||||
(epoch 153)
|
||||
(eval "(ocaml-test-tok-value \":=\" 0)")
|
||||
(epoch 154)
|
||||
(eval "(ocaml-test-tok-value \"::\" 0)")
|
||||
(epoch 155)
|
||||
(eval "(ocaml-test-tok-value \";;\" 0)")
|
||||
(epoch 156)
|
||||
(eval "(ocaml-test-tok-value \"@@\" 0)")
|
||||
(epoch 157)
|
||||
(eval "(ocaml-test-tok-value \"<>\" 0)")
|
||||
(epoch 158)
|
||||
(eval "(ocaml-test-tok-value \"&&\" 0)")
|
||||
(epoch 159)
|
||||
(eval "(ocaml-test-tok-value \"||\" 0)")
|
||||
|
||||
;; ── single-char punctuation ────────────────────────────────────
|
||||
(epoch 160)
|
||||
(eval "(ocaml-test-tok-value \"+\" 0)")
|
||||
(epoch 161)
|
||||
(eval "(ocaml-test-tok-value \"|\" 0)")
|
||||
(epoch 162)
|
||||
(eval "(ocaml-test-tok-value \";\" 0)")
|
||||
(epoch 163)
|
||||
(eval "(ocaml-test-tok-value \"(\" 0)")
|
||||
(epoch 164)
|
||||
(eval "(ocaml-test-tok-value \"!\" 0)")
|
||||
(epoch 165)
|
||||
(eval "(ocaml-test-tok-value \"@\" 0)")
|
||||
|
||||
;; ── comments ───────────────────────────────────────────────────
|
||||
(epoch 170)
|
||||
(eval "(ocaml-test-tok-count \"(* hi *)\")")
|
||||
(epoch 171)
|
||||
(eval "(ocaml-test-tok-value \"(* c *) 42\" 0)")
|
||||
(epoch 172)
|
||||
(eval "(ocaml-test-tok-count \"(* outer (* inner *) end *) 1\")")
|
||||
(epoch 173)
|
||||
(eval "(ocaml-test-tok-value \"(* outer (* inner *) end *) 1\" 0)")
|
||||
|
||||
;; ── compound expressions ───────────────────────────────────────
|
||||
(epoch 180)
|
||||
(eval "(ocaml-test-tok-count \"let x = 1\")")
|
||||
(epoch 181)
|
||||
(eval "(ocaml-test-tok-type \"let x = 1\" 0)")
|
||||
(epoch 182)
|
||||
(eval "(ocaml-test-tok-value \"let x = 1\" 0)")
|
||||
(epoch 183)
|
||||
(eval "(ocaml-test-tok-type \"let x = 1\" 1)")
|
||||
(epoch 184)
|
||||
(eval "(ocaml-test-tok-value \"let x = 1\" 2)")
|
||||
(epoch 185)
|
||||
(eval "(ocaml-test-tok-value \"let x = 1\" 3)")
|
||||
|
||||
(epoch 190)
|
||||
(eval "(ocaml-test-tok-count \"match x with | None -> 0 | Some y -> y\")")
|
||||
(epoch 191)
|
||||
(eval "(ocaml-test-tok-value \"fun x -> x + 1\" 2)")
|
||||
(epoch 192)
|
||||
(eval "(ocaml-test-tok-type \"fun x -> x + 1\" 2)")
|
||||
(epoch 193)
|
||||
(eval "(ocaml-test-tok-type \"Some 42\" 0)")
|
||||
(epoch 194)
|
||||
(eval "(ocaml-test-tok-value \"a |> f |> g\" 1)")
|
||||
(epoch 195)
|
||||
(eval "(ocaml-test-tok-value \"x := !y\" 1)")
|
||||
|
||||
;; ── Phase 1.parse: parser ──────────────────────────────────────
|
||||
;; Atoms
|
||||
(epoch 200)
|
||||
(eval "(ocaml-parse \"42\")")
|
||||
(epoch 201)
|
||||
(eval "(ocaml-parse \"3.14\")")
|
||||
(epoch 202)
|
||||
(eval "(ocaml-parse \"\\\"hi\\\"\")")
|
||||
(epoch 203)
|
||||
(eval "(ocaml-parse \"'a'\")")
|
||||
(epoch 204)
|
||||
(eval "(ocaml-parse \"true\")")
|
||||
(epoch 205)
|
||||
(eval "(ocaml-parse \"false\")")
|
||||
(epoch 206)
|
||||
(eval "(ocaml-parse \"x\")")
|
||||
(epoch 207)
|
||||
(eval "(ocaml-parse \"Some\")")
|
||||
(epoch 208)
|
||||
(eval "(ocaml-parse \"()\")")
|
||||
|
||||
;; Application (left-assoc)
|
||||
(epoch 210)
|
||||
(eval "(ocaml-parse \"f x\")")
|
||||
(epoch 211)
|
||||
(eval "(ocaml-parse \"f x y\")")
|
||||
(epoch 212)
|
||||
(eval "(ocaml-parse \"f (g x)\")")
|
||||
(epoch 213)
|
||||
(eval "(ocaml-parse \"Some 42\")")
|
||||
|
||||
;; Binops with precedence
|
||||
(epoch 220)
|
||||
(eval "(ocaml-parse \"1 + 2\")")
|
||||
(epoch 221)
|
||||
(eval "(ocaml-parse \"a + b * c\")")
|
||||
(epoch 222)
|
||||
(eval "(ocaml-parse \"a * b + c\")")
|
||||
(epoch 223)
|
||||
(eval "(ocaml-parse \"a && b || c\")")
|
||||
(epoch 224)
|
||||
(eval "(ocaml-parse \"a = b\")")
|
||||
(epoch 225)
|
||||
(eval "(ocaml-parse \"a ^ b ^ c\")")
|
||||
(epoch 226)
|
||||
(eval "(ocaml-parse \"a :: b :: []\")")
|
||||
(epoch 227)
|
||||
(eval "(ocaml-parse \"(a + b) * c\")")
|
||||
(epoch 228)
|
||||
(eval "(ocaml-parse \"a |> f |> g\")")
|
||||
(epoch 229)
|
||||
(eval "(ocaml-parse \"x mod 2\")")
|
||||
|
||||
;; Prefix
|
||||
(epoch 230)
|
||||
(eval "(ocaml-parse \"-x\")")
|
||||
(epoch 231)
|
||||
(eval "(ocaml-parse \"-1 + 2\")")
|
||||
|
||||
;; Tuples & lists
|
||||
(epoch 240)
|
||||
(eval "(ocaml-parse \"(1, 2, 3)\")")
|
||||
(epoch 241)
|
||||
(eval "(ocaml-parse \"[1; 2; 3]\")")
|
||||
(epoch 242)
|
||||
(eval "(ocaml-parse \"[]\")")
|
||||
|
||||
;; if / fun / let / let rec
|
||||
(epoch 250)
|
||||
(eval "(ocaml-parse \"if x then 1 else 2\")")
|
||||
(epoch 251)
|
||||
(eval "(ocaml-parse \"if c then x\")")
|
||||
(epoch 252)
|
||||
(eval "(ocaml-parse \"fun x -> x + 1\")")
|
||||
(epoch 253)
|
||||
(eval "(ocaml-parse \"fun x y -> x + y\")")
|
||||
(epoch 254)
|
||||
(eval "(ocaml-parse \"let x = 1 in x\")")
|
||||
(epoch 255)
|
||||
(eval "(ocaml-parse \"let f x = x + 1 in f 2\")")
|
||||
(epoch 256)
|
||||
(eval "(ocaml-parse \"let rec f x = f x in f 1\")")
|
||||
(epoch 257)
|
||||
(eval "(ocaml-parse \"let f x y = x + y in f 1 2\")")
|
||||
|
||||
;; begin/end
|
||||
(epoch 260)
|
||||
(eval "(ocaml-parse \"begin 1 + 2 end\")")
|
||||
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
|
||||
|
||||
check() {
|
||||
local epoch="$1" desc="$2" expected="$3"
|
||||
local actual
|
||||
actual=$(echo "$OUTPUT" | grep -A1 "^(ok-len $epoch " | tail -1)
|
||||
if [ -z "$actual" ]; then
|
||||
actual=$(echo "$OUTPUT" | grep "^(ok $epoch " || true)
|
||||
fi
|
||||
if [ -z "$actual" ]; then
|
||||
actual=$(echo "$OUTPUT" | grep "^(error $epoch " || true)
|
||||
fi
|
||||
if [ -z "$actual" ]; then
|
||||
actual="<no output for epoch $epoch>"
|
||||
fi
|
||||
|
||||
if echo "$actual" | grep -qF -- "$expected"; then
|
||||
PASS=$((PASS + 1))
|
||||
[ "$VERBOSE" = "-v" ] && echo " ok $desc"
|
||||
else
|
||||
FAIL=$((FAIL + 1))
|
||||
ERRORS+=" FAIL $desc (epoch $epoch)
|
||||
expected: $expected
|
||||
actual: $actual
|
||||
"
|
||||
fi
|
||||
}
|
||||
|
||||
# empty / eof
|
||||
check 100 "empty tokens length" '1'
|
||||
check 101 "empty first is eof" '"eof"'
|
||||
|
||||
# numbers
|
||||
check 110 "int type" '"number"'
|
||||
check 111 "int value" '42'
|
||||
check 112 "float value" '3.14'
|
||||
check 113 "hex value" '255'
|
||||
check 114 "exponent" '1000'
|
||||
check 115 "underscored int" '1000000'
|
||||
check 116 "neg exponent" '0.0314'
|
||||
|
||||
# idents / ctors / keywords
|
||||
check 120 "ident type" '"ident"'
|
||||
check 121 "ident value" '"foo_bar1"'
|
||||
check 122 "ctor type" '"ctor"'
|
||||
check 123 "ctor value" '"Some"'
|
||||
check 124 "let keyword type" '"keyword"'
|
||||
check 125 "match keyword value" '"match"'
|
||||
check 126 "true is keyword" '"keyword"'
|
||||
check 127 "false value" '"false"'
|
||||
check 128 "primed ident" "\"name'\""
|
||||
|
||||
# strings
|
||||
check 130 "string type" '"string"'
|
||||
check 131 "string value" '"hi"'
|
||||
check 132 "escape sequence" '"a'
|
||||
|
||||
# chars
|
||||
check 140 "char type" '"char"'
|
||||
check 141 "char value" '"a"'
|
||||
check 142 "char escape" '"'
|
||||
|
||||
# tyvars
|
||||
check 145 "tyvar type" '"tyvar"'
|
||||
check 146 "tyvar value" '"a"'
|
||||
|
||||
# multi-char ops
|
||||
check 150 "->" '"->"'
|
||||
check 151 "|>" '"|>"'
|
||||
check 152 "<-" '"<-"'
|
||||
check 153 ":=" '":="'
|
||||
check 154 "::" '"::"'
|
||||
check 155 ";;" '";;"'
|
||||
check 156 "@@" '"@@"'
|
||||
check 157 "<>" '"<>"'
|
||||
check 158 "&&" '"&&"'
|
||||
check 159 "||" '"||"'
|
||||
|
||||
# single ops
|
||||
check 160 "+" '"+"'
|
||||
check 161 "|" '"|"'
|
||||
check 162 ";" '";"'
|
||||
check 163 "(" '"("'
|
||||
check 164 "!" '"!"'
|
||||
check 165 "@" '"@"'
|
||||
|
||||
# comments
|
||||
check 170 "block comment alone -> eof" '1'
|
||||
check 171 "num after block comment" '42'
|
||||
check 172 "nested comment count" '2'
|
||||
check 173 "nested comment value" '1'
|
||||
|
||||
# compound
|
||||
check 180 "let x = 1 count" '5'
|
||||
check 181 "let is keyword" '"keyword"'
|
||||
check 182 "let value" '"let"'
|
||||
check 183 "x is ident" '"ident"'
|
||||
check 184 "= value" '"="'
|
||||
check 185 "1 value" '1'
|
||||
|
||||
check 190 "match expr count" '13'
|
||||
check 191 "fun -> arrow value" '"->"'
|
||||
check 192 "fun -> arrow type" '"op"'
|
||||
check 193 "Some is ctor" '"ctor"'
|
||||
check 194 "first |> value" '"|>"'
|
||||
check 195 "ref assign :=" '":="'
|
||||
|
||||
# ── Parser tests ────────────────────────────────────────────────
|
||||
check 200 "parse int" '("int" 42)'
|
||||
check 201 "parse float" '("float" 3.14)'
|
||||
check 202 "parse string" '("string" "hi")'
|
||||
check 203 "parse char" '("char" "a")'
|
||||
check 204 "parse true" '("bool" true)'
|
||||
check 205 "parse false" '("bool" false)'
|
||||
check 206 "parse var" '("var" "x")'
|
||||
check 207 "parse ctor" '("con" "Some")'
|
||||
check 208 "parse unit" '("unit")'
|
||||
|
||||
check 210 "parse f x" '("app" ("var" "f") ("var" "x"))'
|
||||
check 211 "parse f x y left-assoc" '("app" ("app" ("var" "f") ("var" "x")) ("var" "y"))'
|
||||
check 212 "parse f (g x)" '("app" ("var" "f") ("app" ("var" "g") ("var" "x")))'
|
||||
check 213 "parse Some 42" '("app" ("con" "Some") ("int" 42))'
|
||||
|
||||
check 220 "parse 1+2" '("op" "+" ("int" 1) ("int" 2))'
|
||||
check 221 "parse a + b * c prec" '("op" "+" ("var" "a") ("op" "*"'
|
||||
check 222 "parse a*b + c prec" '("op" "+" ("op" "*"'
|
||||
check 223 "parse && / || prec" '("op" "||" ("op" "&&"'
|
||||
check 224 "parse a = b" '("op" "=" ("var" "a") ("var" "b"))'
|
||||
check 225 "parse ^ right-assoc" '("op" "^" ("var" "a") ("op" "^"'
|
||||
check 226 "parse :: right-assoc" '("op" "::" ("var" "a") ("op" "::"'
|
||||
check 227 "parse parens override" '("op" "*" ("op" "+"'
|
||||
check 228 "parse |> chain" '("op" "|>" ("op" "|>"'
|
||||
check 229 "parse mod kw-binop" '("op" "mod" ("var" "x") ("int" 2))'
|
||||
|
||||
check 230 "parse -x" '("neg" ("var" "x"))'
|
||||
check 231 "parse -1+2" '("op" "+" ("neg" ("int" 1)) ("int" 2))'
|
||||
|
||||
check 240 "parse tuple" '("tuple" ("int" 1) ("int" 2) ("int" 3))'
|
||||
check 241 "parse list literal" '("list" ("int" 1) ("int" 2) ("int" 3))'
|
||||
check 242 "parse []" '("list")'
|
||||
|
||||
check 250 "parse if/then/else" '("if" ("var" "x") ("int" 1) ("int" 2))'
|
||||
check 251 "parse if w/o else" '("if" ("var" "c") ("var" "x") ("unit"))'
|
||||
check 252 "parse fun x -> ..." '("fun" ("x") ("op" "+" ("var" "x") ("int" 1)))'
|
||||
check 253 "parse fun x y ->" '("fun" ("x" "y")'
|
||||
check 254 "parse let x = 1 in x" '("let" "x" () ("int" 1) ("var" "x"))'
|
||||
check 255 "parse let f x =" '("let" "f" ("x") ("op" "+"'
|
||||
check 256 "parse let rec f x =" '("let-rec" "f" ("x")'
|
||||
check 257 "parse let f x y =" '("let" "f" ("x" "y")'
|
||||
|
||||
check 260 "parse begin/end" '("op" "+" ("int" 1) ("int" 2))'
|
||||
|
||||
TOTAL=$((PASS + FAIL))
|
||||
if [ $FAIL -eq 0 ]; then
|
||||
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"
|
||||
else
|
||||
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
|
||||
echo ""
|
||||
echo "$ERRORS"
|
||||
fi
|
||||
|
||||
[ $FAIL -eq 0 ]
|
||||
21
lib/ocaml/tests/tokenize.sx
Normal file
21
lib/ocaml/tests/tokenize.sx
Normal file
@@ -0,0 +1,21 @@
|
||||
;; lib/ocaml/tests/tokenize.sx — smoke-test helpers.
|
||||
;;
|
||||
;; Tests are exercised via lib/ocaml/test.sh, which drives sx_server.exe
|
||||
;; over the epoch protocol. This file provides small accessors so the
|
||||
;; bash runner can grep short diagnostic values out of one batched run.
|
||||
|
||||
(define
|
||||
ocaml-test-tok-type
|
||||
(fn (src i) (get (nth (ocaml-tokenize src) i) :type)))
|
||||
|
||||
(define
|
||||
ocaml-test-tok-value
|
||||
(fn (src i) (get (nth (ocaml-tokenize src) i) :value)))
|
||||
|
||||
(define ocaml-test-tok-count (fn (src) (len (ocaml-tokenize src))))
|
||||
|
||||
(define ocaml-test-parse-str (fn (src) (ocaml-parse src)))
|
||||
|
||||
(define
|
||||
ocaml-test-parse-head
|
||||
(fn (src) (nth (ocaml-parse src) 0)))
|
||||
382
lib/ocaml/tokenizer.sx
Normal file
382
lib/ocaml/tokenizer.sx
Normal file
@@ -0,0 +1,382 @@
|
||||
;; lib/ocaml/tokenizer.sx — OCaml lexer.
|
||||
;;
|
||||
;; Tokens: ident, ctor (uppercase ident), keyword, number, string, char, op, eof.
|
||||
;; Token shape: {:type :value :pos} via lex-make-token.
|
||||
;; OCaml is not indentation-sensitive — no layout pass.
|
||||
;; Block comments (* ... *) nest. There is no line-comment syntax.
|
||||
|
||||
(prefix-rename
|
||||
"ocaml-"
|
||||
(quote
|
||||
((make-token lex-make-token)
|
||||
(digit? lex-digit?)
|
||||
(hex-digit? lex-hex-digit?)
|
||||
(alpha? lex-alpha?)
|
||||
(alnum? lex-alnum?)
|
||||
(ident-start? lex-ident-start?)
|
||||
(ident-char? lex-ident-char?)
|
||||
(ws? lex-whitespace?))))
|
||||
|
||||
(define
|
||||
ocaml-keywords
|
||||
(list
|
||||
"and"
|
||||
"as"
|
||||
"assert"
|
||||
"begin"
|
||||
"class"
|
||||
"constraint"
|
||||
"do"
|
||||
"done"
|
||||
"downto"
|
||||
"else"
|
||||
"end"
|
||||
"exception"
|
||||
"external"
|
||||
"false"
|
||||
"for"
|
||||
"fun"
|
||||
"function"
|
||||
"functor"
|
||||
"if"
|
||||
"in"
|
||||
"include"
|
||||
"inherit"
|
||||
"initializer"
|
||||
"lazy"
|
||||
"let"
|
||||
"match"
|
||||
"method"
|
||||
"module"
|
||||
"mutable"
|
||||
"new"
|
||||
"nonrec"
|
||||
"object"
|
||||
"of"
|
||||
"open"
|
||||
"or"
|
||||
"private"
|
||||
"rec"
|
||||
"sig"
|
||||
"struct"
|
||||
"then"
|
||||
"to"
|
||||
"true"
|
||||
"try"
|
||||
"type"
|
||||
"val"
|
||||
"virtual"
|
||||
"when"
|
||||
"while"
|
||||
"with"
|
||||
"land"
|
||||
"lor"
|
||||
"lxor"
|
||||
"lsl"
|
||||
"lsr"
|
||||
"asr"
|
||||
"mod"))
|
||||
|
||||
(define ocaml-keyword? (fn (word) (contains? ocaml-keywords word)))
|
||||
|
||||
(define
|
||||
ocaml-upper?
|
||||
(fn (c) (and (not (= c nil)) (>= c "A") (<= c "Z"))))
|
||||
|
||||
(define
|
||||
ocaml-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
(define
|
||||
ocaml-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define cur (fn () (ocaml-peek 0)))
|
||||
(define advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
push!
|
||||
(fn
|
||||
(type value start)
|
||||
(append! tokens (ocaml-make-token type value start))))
|
||||
(define
|
||||
skip-block-comment!
|
||||
(fn
|
||||
(depth)
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((and (= (cur) "*") (= (ocaml-peek 1) ")"))
|
||||
(begin
|
||||
(advance! 2)
|
||||
(when
|
||||
(> depth 1)
|
||||
(skip-block-comment! (- depth 1)))))
|
||||
((and (= (cur) "(") (= (ocaml-peek 1) "*"))
|
||||
(begin
|
||||
(advance! 2)
|
||||
(skip-block-comment! (+ depth 1))))
|
||||
(else (begin (advance! 1) (skip-block-comment! depth))))))
|
||||
(define
|
||||
skip-ws!
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((ocaml-ws? (cur)) (begin (advance! 1) (skip-ws!)))
|
||||
((and (= (cur) "(") (= (ocaml-peek 1) "*"))
|
||||
(begin
|
||||
(advance! 2)
|
||||
(skip-block-comment! 1)
|
||||
(skip-ws!)))
|
||||
(else nil))))
|
||||
(define
|
||||
read-ident
|
||||
(fn
|
||||
(start)
|
||||
(begin
|
||||
(when
|
||||
(and (< pos src-len) (ocaml-ident-char? (cur)))
|
||||
(begin (advance! 1) (read-ident start)))
|
||||
(when
|
||||
(and (< pos src-len) (= (cur) "'"))
|
||||
(begin (advance! 1) (read-ident start)))
|
||||
(slice src start pos))))
|
||||
(define
|
||||
read-decimal-digits!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (or (ocaml-digit? (cur)) (= (cur) "_")))
|
||||
(begin (advance! 1) (read-decimal-digits!)))))
|
||||
(define
|
||||
read-hex-digits!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(or (ocaml-hex-digit? (cur)) (= (cur) "_")))
|
||||
(begin (advance! 1) (read-hex-digits!)))))
|
||||
(define
|
||||
read-exp-part!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (or (= (cur) "e") (= (cur) "E")))
|
||||
(let
|
||||
((p1 (ocaml-peek 1)))
|
||||
(when
|
||||
(or
|
||||
(and (not (= p1 nil)) (ocaml-digit? p1))
|
||||
(and
|
||||
(or (= p1 "+") (= p1 "-"))
|
||||
(< (+ pos 2) src-len)
|
||||
(ocaml-digit? (ocaml-peek 2))))
|
||||
(begin
|
||||
(advance! 1)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(or (= (cur) "+") (= (cur) "-")))
|
||||
(advance! 1))
|
||||
(read-decimal-digits!)))))))
|
||||
(define
|
||||
strip-underscores
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((out (list)) (i 0) (n (len s)))
|
||||
(begin
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(< i n)
|
||||
(begin
|
||||
(when
|
||||
(not (= (nth s i) "_"))
|
||||
(append! out (nth s i)))
|
||||
(set! i (+ i 1))
|
||||
(loop)))))
|
||||
(loop)
|
||||
(join "" out)))))
|
||||
(define
|
||||
read-number
|
||||
(fn
|
||||
(start)
|
||||
(cond
|
||||
((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (ocaml-peek 1) "x") (= (ocaml-peek 1) "X")))
|
||||
(begin
|
||||
(advance! 2)
|
||||
(read-hex-digits!)
|
||||
(let
|
||||
((raw (slice src (+ start 2) pos)))
|
||||
(parse-number (str "0x" (strip-underscores raw))))))
|
||||
(else
|
||||
(begin
|
||||
(read-decimal-digits!)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(= (cur) ".")
|
||||
(or
|
||||
(>= (+ pos 1) src-len)
|
||||
(not (= (ocaml-peek 1) "."))))
|
||||
(begin (advance! 1) (read-decimal-digits!)))
|
||||
(read-exp-part!)
|
||||
(parse-number (strip-underscores (slice src start pos))))))))
|
||||
(define
|
||||
read-string-literal
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((chars (list)))
|
||||
(begin
|
||||
(advance! 1)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((= (cur) "\\")
|
||||
(begin
|
||||
(advance! 1)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (cur)))
|
||||
(begin
|
||||
(cond
|
||||
((= ch "n") (append! chars "\n"))
|
||||
((= ch "t") (append! chars "\t"))
|
||||
((= ch "r") (append! chars "\r"))
|
||||
((= ch "b") (append! chars "\\b"))
|
||||
((= ch "\\") (append! chars "\\"))
|
||||
((= ch "'") (append! chars "'"))
|
||||
((= ch "\"") (append! chars "\""))
|
||||
((= ch " ") nil)
|
||||
(else (append! chars ch)))
|
||||
(advance! 1))))
|
||||
(loop)))
|
||||
((= (cur) "\"") (advance! 1))
|
||||
(else
|
||||
(begin
|
||||
(append! chars (cur))
|
||||
(advance! 1)
|
||||
(loop))))))
|
||||
(loop)
|
||||
(join "" chars)))))
|
||||
(define
|
||||
read-char-literal
|
||||
(fn
|
||||
()
|
||||
(begin
|
||||
(advance! 1)
|
||||
(let
|
||||
((value (cond ((= (cur) "\\") (begin (advance! 1) (let ((ch (cur))) (begin (advance! 1) (cond ((= ch "n") "\n") ((= ch "t") "\t") ((= ch "r") "\r") ((= ch "b") "\\b") ((= ch "\\") "\\") ((= ch "'") "'") ((= ch "\"") "\"") (else ch)))))) (else (let ((ch (cur))) (begin (advance! 1) ch))))))
|
||||
(begin
|
||||
(when
|
||||
(and (< pos src-len) (= (cur) "'"))
|
||||
(advance! 1))
|
||||
value)))))
|
||||
(define
|
||||
try-punct
|
||||
(fn
|
||||
(start)
|
||||
(let
|
||||
((c (cur))
|
||||
(c1 (ocaml-peek 1))
|
||||
(c2 (ocaml-peek 2)))
|
||||
(cond
|
||||
((and (= c ";") (= c1 ";"))
|
||||
(begin (advance! 2) (push! "op" ";;" start) true))
|
||||
((and (= c "-") (= c1 ">"))
|
||||
(begin (advance! 2) (push! "op" "->" start) true))
|
||||
((and (= c "<") (= c1 "-"))
|
||||
(begin (advance! 2) (push! "op" "<-" start) true))
|
||||
((and (= c ":") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" ":=" start) true))
|
||||
((and (= c ":") (= c1 ":"))
|
||||
(begin (advance! 2) (push! "op" "::" start) true))
|
||||
((and (= c "|") (= c1 "|"))
|
||||
(begin (advance! 2) (push! "op" "||" start) true))
|
||||
((and (= c "&") (= c1 "&"))
|
||||
(begin (advance! 2) (push! "op" "&&" start) true))
|
||||
((and (= c "<") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" "<=" start) true))
|
||||
((and (= c ">") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" ">=" start) true))
|
||||
((and (= c "<") (= c1 ">"))
|
||||
(begin (advance! 2) (push! "op" "<>" start) true))
|
||||
((and (= c "=") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" "==" start) true))
|
||||
((and (= c "!") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" "!=" start) true))
|
||||
((and (= c "|") (= c1 ">"))
|
||||
(begin (advance! 2) (push! "op" "|>" start) true))
|
||||
((and (= c "<") (= c1 "|"))
|
||||
(begin (advance! 2) (push! "op" "<|" start) true))
|
||||
((and (= c "@") (= c1 "@"))
|
||||
(begin (advance! 2) (push! "op" "@@" start) true))
|
||||
((and (= c "*") (= c1 "*"))
|
||||
(begin (advance! 2) (push! "op" "**" start) true))
|
||||
((or (= c "+") (= c "-") (= c "*") (= c "/") (= c "%") (= c "^") (= c "<") (= c ">") (= c "=") (= c "(") (= c ")") (= c "{") (= c "}") (= c "[") (= c "]") (= c ";") (= c ":") (= c ",") (= c ".") (= c "|") (= c "!") (= c "&") (= c "@") (= c "?") (= c "~") (= c "#"))
|
||||
(begin (advance! 1) (push! "op" c start) true))
|
||||
(else false)))))
|
||||
(define
|
||||
step
|
||||
(fn
|
||||
()
|
||||
(begin
|
||||
(skip-ws!)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((start pos) (c (cur)))
|
||||
(cond
|
||||
((ocaml-ident-start? c)
|
||||
(let
|
||||
((word (read-ident start)))
|
||||
(begin
|
||||
(cond
|
||||
((ocaml-keyword? word)
|
||||
(push! "keyword" word start))
|
||||
((ocaml-upper? c) (push! "ctor" word start))
|
||||
(else (push! "ident" word start)))
|
||||
(step))))
|
||||
((ocaml-digit? c)
|
||||
(let
|
||||
((v (read-number start)))
|
||||
(begin (push! "number" v start) (step))))
|
||||
((= c "\"")
|
||||
(let
|
||||
((s (read-string-literal)))
|
||||
(begin (push! "string" s start) (step))))
|
||||
((and (= c "'") (< (+ pos 1) src-len) (or (and (= (ocaml-peek 1) "\\") (< (+ pos 3) src-len) (= (ocaml-peek 3) "'")) (and (not (= (ocaml-peek 1) "\\")) (< (+ pos 2) src-len) (= (ocaml-peek 2) "'"))))
|
||||
(let
|
||||
((v (read-char-literal)))
|
||||
(begin (push! "char" v start) (step))))
|
||||
((= c "'")
|
||||
(begin
|
||||
(advance! 1)
|
||||
(when
|
||||
(and (< pos src-len) (ocaml-ident-start? (cur)))
|
||||
(begin
|
||||
(advance! 1)
|
||||
(read-ident (+ start 1))))
|
||||
(push!
|
||||
"tyvar"
|
||||
(slice src (+ start 1) pos)
|
||||
start)
|
||||
(step)))
|
||||
((try-punct start) (step))
|
||||
(else
|
||||
(error
|
||||
(str "ocaml-tokenize: unexpected char " c " at " pos)))))))))
|
||||
(step)
|
||||
(push! "eof" nil pos)
|
||||
tokens)))
|
||||
@@ -135,6 +135,48 @@ and tightens loose ends.
|
||||
on error switches to the trap branch. Define `apl-throw` and a small
|
||||
set of error codes; use `try`/`catch` from the host.
|
||||
|
||||
### Phase 8 — fill the gaps left after end-to-end
|
||||
|
||||
Phase 7 wired the stack together; Phase 8 closes deferred items, lets real
|
||||
programs run from source, and starts pushing on performance.
|
||||
|
||||
- [x] **Quick-wins bundle** (one iteration) — three small fixes that each unblock
|
||||
real programs:
|
||||
- decimal literals: `read-digits!` consumes one trailing `.` plus more digits
|
||||
so `3.7` tokenises as one number;
|
||||
- `⎕←` (print) — tokenizer special-case: when `⎕` is followed by `←`, emit
|
||||
a single `:name "⎕←"` token (don't split on the assign glyph);
|
||||
- string values in `apl-eval-ast` — handle `:str` (parser already produces
|
||||
them) by wrapping into a vector of character codes (or rank-0 string).
|
||||
- [x] **Named function definitions** — `f ← {⍺+⍵} ⋄ 1 f 2` and `2 f 3`.
|
||||
- parser: when `:assign`'s RHS is a `:dfn`, mark it as a function binding;
|
||||
- eval-ast: `:assign` of a dfn stores the dfn in env;
|
||||
- parser: a name in fn-position whose env value is a dfn dispatches as a fn;
|
||||
- resolver: extend `apl-resolve-monadic`/`-dyadic` with a `:fn-name` case
|
||||
that calls `apl-call-dfn`/`apl-call-dfn-m`.
|
||||
- [x] **Multi-axis bracket indexing** — `A[I;J]` and `A[;J]` and `A[I;]`.
|
||||
- parser: split bracket content on `:semi` at depth 0; emit
|
||||
`(:dyad ⌷ (:vec I J) A)`;
|
||||
- runtime: extend `apl-squad` to accept a vector of indices, treating
|
||||
`nil` / empty axis as "all";
|
||||
- 5+ tests across vector and matrix.
|
||||
- [x] **`.apl` files as actual tests** — `lib/apl/tests/programs/*.apl` are
|
||||
currently documentation. Add `apl-run-file path → array` plus tests that
|
||||
load each file, execute it, and assert the expected result. Makes the
|
||||
classic-program corpus self-validating instead of two parallel impls.
|
||||
_(Embedded source-string approach: tests/programs-e2e.sx runs the same
|
||||
algorithms as the .apl docs through the full pipeline. The original
|
||||
one-liners (e.g. primes' inline `⍵←⍳⍵`) need parser features
|
||||
(compress-as-fn, inline assign) we haven't built yet — multi-stmt forms
|
||||
used instead. Slurp/read-file primitive missing in OCaml SX runtime.)_
|
||||
- [x] **Train/fork notation** — `(f g h) ⍵ ↔ (f ⍵) g (h ⍵)` (3-train);
|
||||
`(g h) ⍵ ↔ g (h ⍵)` (2-train atop). Parser: detect when a parenthesised
|
||||
subexpression is all functions and emit `(:train fns)`; resolver: build the
|
||||
derived function; tests for mean-via-train (`+/÷≢`).
|
||||
- [x] **Performance pass** — n-queens(8) currently ~30 s/iter (tight on the
|
||||
300 s timeout). Target: profile the inner loop, eliminate quadratic
|
||||
list-append, restore the `queens(8)` test.
|
||||
|
||||
## SX primitive baseline
|
||||
|
||||
Use vectors for arrays; numeric tower + rationals for numbers; ADTs for tagged data;
|
||||
@@ -149,6 +191,13 @@ data; format for string templating.
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-05-07: Phase 8 step 6 — perf: swapped (append acc xs) → (append xs acc) in apl-permutations to make permutation generation linear instead of quadratic; q(7) 32s→12s; q(8)=92 test restored within 300s timeout; **Phase 8 complete, all unchecked items ticked**; 497/497
|
||||
- 2026-05-07: Phase 8 step 5 — train/fork notation. Parser :lparen detects all-fn inner segments → emits :train AST; resolver covers 2-atop & 3-fork for both monadic and dyadic. `(+/÷≢) 1..5 → 3` (mean), `(- ⌊) 5 → -5` (atop), `2(+×-)5 → -21` (dyadic fork), `(⌈/-⌊/) → 8` (range); +6 tests; 496/496
|
||||
- 2026-05-07: Phase 8 step 4 — programs-e2e.sx runs classic-algorithm shapes through full pipeline (factorial via ∇, triangulars, sum-of-squares, divisor-counts, prime-mask, named-fn composition, dyadic max-of-two, Newton step); also added ⌿ + ⍀ to glyph sets (were silently skipped); +15 tests; 490/490
|
||||
- 2026-05-07: Phase 8 step 3 — multi-axis bracket A[I;J] / A[I;] / A[;J] via :bracket AST + apl-bracket-multi runtime; split-bracket-content scans :semi at depth 0; apl-cartesian builds index combinations; nil axis = "all"; scalar axis collapses; +8 tests; 475/475
|
||||
- 2026-05-07: Phase 8 step 2 — named function defs end-to-end via parser pre-scan; apl-known-fn-names + apl-collect-fn-bindings detect `name ← {...}` patterns; collect-segments-loop emits :fn-name for known names; resolver looks up env for :fn-name; supports recursion (∇ in named dfn); +7 tests including fact via ∇; 467/467
|
||||
- 2026-05-07: Phase 8 step 1 — quick-wins bundle: decimal literals (3.7, ¯2.5), ⎕← passthrough as monadic fn (single-token via tokenizer special-case), :str AST in eval-ast (single-char→scalar, multi-char→vec); +10 tests; 460/460
|
||||
- 2026-05-07: Phase 8 added — quick-wins bundle (decimals + ⎕← + strings), named functions, multi-axis bracket, .apl-files-as-tests, trains, perf
|
||||
- 2026-05-07: Phase 7 step 6 — :Trap exception machinery via R7RS guard; apl-throw raises tagged error, apl-trap-matches? checks codes (0=catch-all), :trap clause in apl-tradfn-eval-stmt wraps try-block with guard; :throw AST for testing; **Phase 7 complete, all unchecked plan items done**; +5 tests; 450/450
|
||||
- 2026-05-07: Phase 7 step 5 — idiom corpus 34→64 (+30 source-string idioms via apl-run); also fixed tokenizer + parser to recognize ≢ and ≡ glyphs (were silently skipped); 445/445
|
||||
- 2026-05-07: Phase 7 step 4 — bracket indexing `A[I]` desugared to `(:dyad ⌷ I A)` via maybe-bracket helper, wired into :name + :lparen branches of collect-segments-loop; multi-axis (A[I;J]) deferred (semicolon split); +7 tests; 415/415
|
||||
|
||||
@@ -158,8 +158,8 @@ Extract from `haskell/infer.sx`. Algorithm W or J, generalisation, instantiation
|
||||
| 4 — pratt.sx (lua + prolog) | [done] | da27958d | Extracted operator-table format + lookup only — climbing loops stay per-language because lua and prolog use opposite prec conventions. lua/parser.sx: 18-clause cond → 15-entry table. prolog/parser.sx: pl-op-find deleted, pl-op-lookup wraps pratt-op-lookup. lua 185/185, prolog 590/590 — both = baseline. |
|
||||
| 5 — ast.sx (lua + prolog) | [partial — pending real consumers] | a774cd26 | Kit + 33 self-tests shipped (10 canonical kinds, predicates, accessors). Step is "Optional" per brief; lua/prolog parsers untouched (185/185 + 590/590). Datalog-on-sx will be the natural first real consumer; lua/prolog converters can land later. |
|
||||
| 6 — match.sx (haskell + prolog) | [partial — kit shipped; ports deferred] | 863e9d93 | Pure-functional unify + match kit (canonical wire format + cfg-driven adapters) + 25 self-tests. Existing prolog/haskell engines untouched (structurally divergent — mutating-symmetric vs pure-asymmetric — would risk 746 passing tests under brief's revert-on-regression rule). Real consumer is minikraken/datalog work in flight. |
|
||||
| 7 — layout.sx (haskell + synthetic) | [in-progress] | — | — |
|
||||
| 8 — hm.sx (haskell + TBD) | [ ] | — | — |
|
||||
| 7 — layout.sx (haskell + synthetic) | [partial — haskell port deferred] | d75c61d4 | Configurable kit (haskell-style keyword-opens + python-style trailing-`:`-opens) + 6 self-tests covering both flavours. Synthetic Python-ish fixture passes; haskell/layout.sx untouched (kit not yet a drop-in for Haskell 98 Note 5 etc.; haskell still 156/156 baseline). |
|
||||
| 8 — hm.sx (haskell + TBD) | [partial — algebra shipped; assembly deferred] | ab2c40c1 | HM foundations: types/schemes/ftv/apply/compose/generalize/instantiate/fresh-tv on top of match.sx unify, plus literal inference rule. 24/24 self-tests. Algorithm W lambda/app/let assembly deferred to host code — paired sequencing per brief: lib/ocaml/types.sx (OCaml-on-SX Phase 5) + haskell/infer.sx port. Haskell still 156/156 baseline. |
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -116,20 +116,22 @@ SX CEK evaluator (both JS and OCaml hosts)
|
||||
|
||||
### Phase 1 — Tokenizer + parser
|
||||
|
||||
- [ ] **Tokenizer:** keywords (`let`, `rec`, `in`, `fun`, `function`, `match`, `with`,
|
||||
- [x] **Tokenizer:** keywords (`let`, `rec`, `in`, `fun`, `function`, `match`, `with`,
|
||||
`type`, `of`, `module`, `struct`, `end`, `functor`, `sig`, `open`, `include`,
|
||||
`if`, `then`, `else`, `begin`, `try`, `exception`, `raise`, `mutable`,
|
||||
`for`, `while`, `do`, `done`, `and`, `as`, `when`), operators (`->`, `|>`,
|
||||
`<|`, `@@`, `@`, `:=`, `!`, `::`, `**`, `:`, `;`, `;;`), identifiers (lower,
|
||||
upper/ctor, labels `~label:`, optional `?label:`), char literals `'c'`,
|
||||
string literals (escaped + heredoc `{|...|}`), int/float literals,
|
||||
line comments `(*` nested block comments `*)`.
|
||||
- [ ] **Parser:** top-level `let`/`let rec`/`type`/`module`/`exception`/`open`/`include`
|
||||
declarations; expressions: literals, identifiers, constructor application,
|
||||
lambda, application (left-assoc), binary ops with precedence table,
|
||||
`if`/`then`/`else`, `match`/`with`, `try`/`with`, `let`/`in`, `begin`/`end`,
|
||||
`fun`/`function`, tuples, list literals, record literals/updates, field access,
|
||||
sequences `;`, unit `()`.
|
||||
upper/ctor), char literals `'c'`, string literals (escaped),
|
||||
int/float literals (incl. hex, exponent, underscores), nested block
|
||||
comments `(* ... *)`. _(labels `~label:` / `?label:` and heredoc `{|...|}`
|
||||
deferred — surface tokens already work via `~`/`?` punct + `{`/`|` punct.)_
|
||||
- [~] **Parser:** expressions: literals, identifiers, constructor application,
|
||||
lambda, application (left-assoc), binary ops with precedence (29 ops via
|
||||
`lib/guest/pratt.sx`), `if`/`then`/`else`, `let`/`in`, `let rec`,
|
||||
`fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. _(Pending:
|
||||
top-level `let`/`type`/`module`/`exception`/`open`/`include` decls,
|
||||
`match`/`with`, `try`/`with`, `function`, record literals/updates,
|
||||
field access, sequences `;`.)_
|
||||
- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`,
|
||||
list literal, record, `as`, or-pattern `P1 | P2`, `when` guard.
|
||||
- [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed.
|
||||
@@ -308,7 +310,20 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
|
||||
|
||||
_Newest first._
|
||||
|
||||
_(awaiting phase 1)_
|
||||
- 2026-05-07 Phase 1 — `lib/ocaml/parser.sx` expression parser consuming
|
||||
`lib/guest/pratt.sx` for binop precedence (29 operators across 8 levels,
|
||||
incl. keyword-spelled binops `mod`/`land`/`lor`/`lxor`/`lsl`/`lsr`/`asr`).
|
||||
Atoms (literals + var/con/unit/list), application (left-assoc), prefix
|
||||
`-`/`not`, tuples, parens, `if`/`then`/`else`, `fun x y -> body`,
|
||||
`let`/`let rec` with function shorthand. AST shapes match Haskell-on-SX
|
||||
conventions (`(:int N)` `(:op OP L R)` `(:fun PARAMS BODY)` etc.). Total
|
||||
95/95 tests now passing via `lib/ocaml/test.sh`.
|
||||
- 2026-05-07 Phase 1 — `lib/ocaml/tokenizer.sx` consuming `lib/guest/lex.sx`
|
||||
via `prefix-rename`. Covers idents, ctors, 51 keywords, numbers (int / float
|
||||
/ hex / exponent / underscored), strings (with escapes), chars (with escapes),
|
||||
type variables (`'a`), nested block comments, and 26 operator/punct tokens
|
||||
(incl. `->` `|>` `<-` `:=` `::` `;;` `@@` `<>` `&&` `||` `**` etc.). 58/58
|
||||
tokenizer tests pass via `lib/ocaml/test.sh` driving `sx_server.exe`.
|
||||
|
||||
## Blockers
|
||||
|
||||
|
||||
Reference in New Issue
Block a user