diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 5201d17c..59634387 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -416,3 +416,127 @@ " " (ocaml-tok-value (peek-tok))))) result)))))) + +(define + ocaml-parse-program + (fn + (src) + (let + ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list))) + (begin + (set! tok-len (len tokens)) + (define peek-tok (fn () (nth tokens idx))) + (define advance-tok! (fn () (set! idx (+ idx 1)))) + (define + check-tok? + (fn + (type value) + (let + ((t (peek-tok))) + (and + (= (ocaml-tok-type t) type) + (or (= value nil) (= (ocaml-tok-value t) value)))))) + (define + consume! + (fn + (type value) + (if + (check-tok? type value) + (let ((t (peek-tok))) (begin (advance-tok!) t)) + (error + (str + "ocaml-parse-program: expected " + type + " " + value + " got " + (ocaml-tok-type (peek-tok)) + " " + (ocaml-tok-value (peek-tok))))))) + (define at-kw? (fn (kw) (check-tok? "keyword" kw))) + (define at-op? (fn (op) (check-tok? "op" op))) + (define + skip-double-semi! + (fn + () + (when (at-op? ";;") (begin (advance-tok!) (skip-double-semi!))))) + (define + cur-pos + (fn + () + (let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos))))) + (define + skip-to-boundary! + (fn + () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? ";;") nil) + ((at-kw? "let") nil) + (else (begin (advance-tok!) (skip-to-boundary!)))))) + (define + parse-decl-let + (fn + () + (advance-tok!) + (let + ((reccy false)) + (begin + (when + (at-kw? "rec") + (begin (advance-tok!) (set! reccy true))) + (let + ((name (ocaml-tok-value (consume! "ident" nil))) + (params (list))) + (begin + (define + collect-params + (fn + () + (when + (check-tok? "ident" nil) + (begin + (append! params (ocaml-tok-value (peek-tok))) + (advance-tok!) + (collect-params))))) + (collect-params) + (consume! "op" "=") + (let + ((expr-start (cur-pos))) + (begin + (skip-to-boundary!) + (let + ((expr-src (slice src expr-start (cur-pos)))) + (let + ((expr (ocaml-parse expr-src))) + (if + reccy + (list :def-rec name params expr) + (list :def name params expr)))))))))))) + (define + parse-decl-expr + (fn + () + (let + ((expr-start (cur-pos))) + (begin + (skip-to-boundary!) + (let + ((expr-src (slice src expr-start (cur-pos)))) + (let ((expr (ocaml-parse expr-src))) (list :expr expr))))))) + (define + loop + (fn + () + (begin + (skip-double-semi!) + (when + (< idx tok-len) + (cond + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-kw? "let") + (begin (append! decls (parse-decl-let)) (loop))) + (else (begin (append! decls (parse-decl-expr)) (loop)))))))) + (loop) + (cons :program decls))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 3b2fae6b..cf55ac8a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -260,6 +260,26 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 260) (eval "(ocaml-parse \"begin 1 + 2 end\")") +;; ── Top-level decls ──────────────────────────────────────────── +(epoch 270) +(eval "(ocaml-parse-program \"let x = 1\")") +(epoch 271) +(eval "(ocaml-parse-program \"let x = 1 ;;\")") +(epoch 272) +(eval "(ocaml-parse-program \"let f x = x + 1\")") +(epoch 273) +(eval "(ocaml-parse-program \"let rec fact n = if n = 0 then 1 else n * fact (n - 1)\")") +(epoch 274) +(eval "(ocaml-parse-program \"let x = 1 let y = 2\")") +(epoch 275) +(eval "(ocaml-parse-program \"1 + 2 ;;\")") +(epoch 276) +(eval "(ocaml-parse-program \"let x = 1 ;; let y = 2 ;; x + y\")") +(epoch 277) +(eval "(len (ocaml-parse-program \"let x = 1 ;; let y = 2 ;; x + y\"))") +(epoch 278) +(eval "(ocaml-parse-program \"\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -414,6 +434,17 @@ check 257 "parse let f x y =" '("let" "f" ("x" "y")' check 260 "parse begin/end" '("op" "+" ("int" 1) ("int" 2))' +# ── Top-level decls ───────────────────────────────────────────── +check 270 "program: let x = 1" '("program" ("def" "x" () ("int" 1)))' +check 271 "program: let x = 1 ;;" '("program" ("def" "x" () ("int" 1)))' +check 272 "program: let f x = x+1" '("program" ("def" "f" ("x") ("op" "+"' +check 273 "program: let rec fact" '("def-rec" "fact" ("n")' +check 274 "program: two decls" '("def" "x" () ("int" 1)) ("def" "y"' +check 275 "program: bare expr" '("program" ("expr" ("op" "+" ("int" 1) ("int" 2))))' +check 276 "program: mixed decls + expr" '("def" "y" () ("int" 2)) ("expr"' +check 277 "program: 4 forms incl head" '4' +check 278 "program: empty" '("program")' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 54bd98f2..7c1534e0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -128,10 +128,11 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] **Parser:** expressions: literals, identifiers, constructor application, lambda, application (left-assoc), binary ops with precedence (29 ops via `lib/guest/pratt.sx`), `if`/`then`/`else`, `let`/`in`, `let rec`, - `fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. _(Pending: - top-level `let`/`type`/`module`/`exception`/`open`/`include` decls, - `match`/`with`, `try`/`with`, `function`, record literals/updates, - field access, sequences `;`.)_ + `fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. Top-level + decls: `let [rec] name params* = expr` and bare expressions, `;;`-separated + via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/ + `include` decls, `match`/`with`, `try`/`with`, `function`, record literals/ + updates, field access, sequences `;`, `and` mutually-recursive bindings.)_ - [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`, list literal, record, `as`, or-pattern `P1 | P2`, `when` guard. - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. @@ -310,6 +311,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses + a sequence of `let [rec] name params* = expr` decls and bare expressions + separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`, + `(:def-rec …)`, `(:expr E)`. Decl bodies parsed by re-feeding the source + slice through `ocaml-parse` (cheap stand-in until shared-state refactor). + 104/104 tests now passing (+9). - 2026-05-07 Phase 1 — `lib/ocaml/parser.sx` expression parser consuming `lib/guest/pratt.sx` for binop precedence (29 operators across 8 levels, incl. keyword-spelled binops `mod`/`land`/`lor`/`lxor`/`lsl`/`lsr`/`asr`).