From de7be332c8346819f775ff5e9ea43ad024316c63 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:33:24 +0000 Subject: [PATCH] ocaml: phase 5.1 baseline OCaml programs (5/5 pass) + lookahead boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/ocaml/baseline/{factorial,list_ops,option_match,module_use,sum_squares}.ml exercised through ocaml-run-program (file-read F). lib/ocaml/baseline/ run.sh runs them and compares against expected.json — all 5 pass. To make module_use.ml (with nested let-in) parse, parser's skip-let-rhs-boundary! now uses has-matching-in? lookahead: a let at depth 0 in a let-decl rhs opens a nested block IFF a matching in exists before any decl-keyword. Without that in, the let is a new top-level decl (preserves test 274 'let x = 1 let y = 2'). This is the first piece of Phase 5.1 'vendor a slice of OCaml testsuite' — handcrafted fixtures for now, real testsuite TBD. --- lib/ocaml/baseline/expected.json | 7 +++ lib/ocaml/baseline/factorial.ml | 4 ++ lib/ocaml/baseline/list_ops.ml | 5 ++ lib/ocaml/baseline/module_use.ml | 14 +++++ lib/ocaml/baseline/option_match.ml | 8 +++ lib/ocaml/baseline/run.sh | 75 +++++++++++++++++++++++ lib/ocaml/baseline/sum_squares.ml | 6 ++ lib/ocaml/parser.sx | 96 ++++++++++++++++++++++++++---- plans/ocaml-on-sx.md | 19 ++++-- 9 files changed, 217 insertions(+), 17 deletions(-) create mode 100644 lib/ocaml/baseline/expected.json create mode 100644 lib/ocaml/baseline/factorial.ml create mode 100644 lib/ocaml/baseline/list_ops.ml create mode 100644 lib/ocaml/baseline/module_use.ml create mode 100644 lib/ocaml/baseline/option_match.ml create mode 100755 lib/ocaml/baseline/run.sh create mode 100644 lib/ocaml/baseline/sum_squares.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json new file mode 100644 index 00000000..f5c089ad --- /dev/null +++ b/lib/ocaml/baseline/expected.json @@ -0,0 +1,7 @@ +{ + "factorial.ml": 3628800, + "list_ops.ml": 30, + "option_match.ml": 5, + "module_use.ml": 3, + "sum_squares.ml": 385 +} diff --git a/lib/ocaml/baseline/factorial.ml b/lib/ocaml/baseline/factorial.ml new file mode 100644 index 00000000..e2234fff --- /dev/null +++ b/lib/ocaml/baseline/factorial.ml @@ -0,0 +1,4 @@ +(* Baseline: factorial via let-rec *) +let rec fact n = + if n = 0 then 1 else n * fact (n - 1) ;; +fact 10 diff --git a/lib/ocaml/baseline/list_ops.ml b/lib/ocaml/baseline/list_ops.ml new file mode 100644 index 00000000..fd1d23c7 --- /dev/null +++ b/lib/ocaml/baseline/list_ops.ml @@ -0,0 +1,5 @@ +(* Baseline: List functions exercise *) +let xs = [1; 2; 3; 4; 5] ;; +let doubled = List.map (fun x -> x * 2) xs ;; +let total = List.fold_left (fun a b -> a + b) 0 doubled ;; +total diff --git a/lib/ocaml/baseline/module_use.ml b/lib/ocaml/baseline/module_use.ml new file mode 100644 index 00000000..25e33331 --- /dev/null +++ b/lib/ocaml/baseline/module_use.ml @@ -0,0 +1,14 @@ +(* Baseline: module declaration + use *) +module Counter = struct + let make () = + let n = ref 0 in + fun () -> + n := !n + 1 ; + !n +end ;; +let result = + let c = Counter.make () in + let _ = c () in + let _ = c () in + c () ;; +result diff --git a/lib/ocaml/baseline/option_match.ml b/lib/ocaml/baseline/option_match.ml new file mode 100644 index 00000000..a2d0f864 --- /dev/null +++ b/lib/ocaml/baseline/option_match.ml @@ -0,0 +1,8 @@ +(* Baseline: option type + pattern matching *) +let safe_div a b = + if b = 0 then None else Some (a / b) ;; +let result = + match safe_div 20 4 with + | None -> 0 + | Some x -> x ;; +result diff --git a/lib/ocaml/baseline/run.sh b/lib/ocaml/baseline/run.sh new file mode 100755 index 00000000..516d855c --- /dev/null +++ b/lib/ocaml/baseline/run.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# lib/ocaml/baseline/run.sh — run each baseline OCaml program through +# ocaml-run-program and compare to expected.json. + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi + +PASS=0 +FAIL=0 +ERRORS="" + +for f in lib/ocaml/baseline/*.ml; do + name=$(basename "$f") + expected=$(grep -oE "\"$name\"[[:space:]]*:[[:space:]]*[0-9-]+" lib/ocaml/baseline/expected.json | sed -E 's/.*:[[:space:]]*//') + if [ -z "$expected" ]; then + continue + fi + + TMP=$(mktemp) + cat > "$TMP" << EPOCHS +(epoch 1) +(load "lib/guest/lex.sx") +(load "lib/guest/prefix.sx") +(load "lib/guest/pratt.sx") +(load "lib/ocaml/tokenizer.sx") +(load "lib/ocaml/parser.sx") +(load "lib/ocaml/eval.sx") +(load "lib/ocaml/runtime.sx") +(eval "(ocaml-load-stdlib!)") +(epoch 2) +(eval "(ocaml-run-program (file-read \\"$f\\"))") +EPOCHS + + output=$(timeout 60 "$SX_SERVER" < "$TMP" 2>/dev/null | grep -E '^\(ok-len 2|^\(ok 2' | head -1) + rm -f "$TMP" + + # Pull the next line which has the value + result=$(timeout 60 "$SX_SERVER" < <(cat </dev/null | awk '/^\(ok-len 2 / {getline; print; exit} /^\(ok 2 / {sub(/^\(ok 2 /, ""); sub(/\)$/, ""); print; exit}') + + if [ "$result" = "$expected" ]; then + PASS=$((PASS + 1)) + echo " ok $name → $result" + else + FAIL=$((FAIL + 1)) + ERRORS+=" FAIL $name expected=$expected got=$result +" + fi +done + +TOTAL=$((PASS + FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL baseline OCaml programs run correctly" +else + echo "FAIL $PASS/$TOTAL baseline programs" + echo "$ERRORS" +fi +[ $FAIL -eq 0 ] diff --git a/lib/ocaml/baseline/sum_squares.ml b/lib/ocaml/baseline/sum_squares.ml new file mode 100644 index 00000000..a1a17778 --- /dev/null +++ b/lib/ocaml/baseline/sum_squares.ml @@ -0,0 +1,6 @@ +(* Baseline: imperative loop summing squares *) +let total = ref 0 ;; +for i = 1 to 10 do + total := !total + i * i +done ;; +!total diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index b85dba56..63e691a8 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -895,12 +895,88 @@ (fn () (let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos))))) - ;; skip-to-boundary! advances `idx` to the next top-level decl - ;; boundary, tracking `let`/`begin`/`struct` etc. nesting so that - ;; an inner `let ... in ...` doesn't terminate a top-level decl - ;; body. Boundary tokens (when at depth 0): - ;; ;; let module open include and type exception - ;; Boundary at any depth: eof. + ;; Two flavors of boundary skipping: + ;; + ;; * `skip-to-decl-boundary!` — used by parse-decl-expr. Stops + ;; at the start of the next top-level decl: ;;, let, module, + ;; open, include, and, type, exception, or eof. + ;; + ;; * `skip-let-rhs-boundary!` — used inside parse-decl-let after + ;; the `=`. Treats `let` as the opener of a nested let..in + ;; block (NOT a decl boundary), so `let f x = let y = 0 in y` + ;; parses correctly. Boundary tokens (depth 0): ;;, module, + ;; open, include, and, type, exception, or eof. + ;; Lookahead: starting just past a `let` at the cursor, scan + ;; for a matching `in` before the next decl boundary. Returns + ;; true iff such an `in` exists — meaning the let is nested, + ;; not a new decl. + (define has-matching-in? + (fn () + (let ((p (+ idx 1)) (d 1) (result false) (done false)) + (begin + (define scan + (fn () + (when (not done) + (cond + ((>= p tok-len) (set! done true)) + (else + (let ((t (nth tokens p))) + (let ((tt (ocaml-tok-type t)) (tv (ocaml-tok-value t))) + (cond + ((= tt "eof") (set! done true)) + ((and (= tt "op") (= tv ";;")) (set! done true)) + ((and (= tt "keyword") (= tv "module")) (set! done true)) + ((and (= tt "keyword") (= tv "type")) (set! done true)) + ((and (= tt "keyword") (= tv "exception")) (set! done true)) + ((and (= tt "keyword") (= tv "open")) (set! done true)) + ((and (= tt "keyword") (= tv "include")) (set! done true)) + ((and (= tt "keyword") (= tv "and")) (set! done true)) + ((and (= tt "keyword") (= tv "let")) + (begin (set! d (+ d 1)) (set! p (+ p 1)) (scan))) + ((and (= tt "keyword") (= tv "in")) + (cond + ((= d 1) (begin (set! result true) (set! done true))) + (else + (begin (set! d (- d 1)) (set! p (+ p 1)) (scan))))) + (else (begin (set! p (+ p 1)) (scan))))))))))) + (scan) + result)))) + + ;; Same as skip-to-boundary but treats inner `let` as the start + ;; of a nested let..in (open depth) IF a matching `in` exists + ;; before any decl boundary; otherwise stops. + (define + skip-let-rhs-boundary! + (fn () + (let ((depth 0)) + (begin + (define step + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((and (= depth 0) (at-op? ";;")) nil) + ((and (= depth 0) (at-kw? "module")) nil) + ((and (= depth 0) (at-kw? "open")) nil) + ((and (= depth 0) (at-kw? "include")) nil) + ((and (= depth 0) (at-kw? "and")) nil) + ((and (= depth 0) (at-kw? "type")) nil) + ((and (= depth 0) (at-kw? "exception")) nil) + ((and (= depth 0) (at-kw? "let")) + (cond + ((has-matching-in?) + (begin (set! depth (+ depth 1)) (advance-tok!) (step))) + (else nil))) + ((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct") + (at-kw? "sig") (at-kw? "for") (at-kw? "while")) + (begin (set! depth (+ depth 1)) (advance-tok!) (step))) + ((or (at-kw? "in") (at-kw? "end") (at-kw? "done")) + (begin + (when (> depth 0) (set! depth (- depth 1))) + (advance-tok!) (step))) + (else (begin (advance-tok!) (step)))))) + (step))))) + (define skip-to-boundary! (fn () @@ -919,12 +995,6 @@ ((and (= depth 0) (at-kw? "and")) nil) ((and (= depth 0) (at-kw? "type")) nil) ((and (= depth 0) (at-kw? "exception")) nil) - ;; Track nested blocks that have explicit closing - ;; tokens. let..in / begin..end / struct..end / - ;; sig..end / for..done / while..done. `if`/`match`/ - ;; `try` don't have hard close tokens so we don't - ;; track them — their bodies are bounded by the - ;; surrounding expression structure. ((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct") (at-kw? "sig") (at-kw? "for") (at-kw? "while")) (begin (set! depth (+ depth 1)) (advance-tok!) (step))) @@ -968,7 +1038,7 @@ (consume! "op" "=") (let ((expr-start (cur-pos))) (begin - (skip-to-boundary!) + (skip-let-rhs-boundary!) (let ((expr-src (slice src expr-start (cur-pos)))) (let ((expr (ocaml-parse expr-src))) (append! bindings (list nm ps expr)))))))))) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 328791eb..8ad74a6f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -214,10 +214,12 @@ SX CEK evaluator (both JS and OCaml hosts) eval-core, phase2-refs, phase2-loops, phase2-function, phase2-exn, phase3-adt, phase4-modules, phase5-hm, phase6-stdlib, let-and, phase1-params, misc), and emits `scoreboard.json` + `scoreboard.md`. -- [ ] Vendor a slice of the OCaml testsuite at `lib/ocaml/baseline/` - and feed it through `ocaml-run-program`, scoring per-file - conformance. _(Pending — needs more stdlib coverage and ADT type - decls to make most testsuite files runnable.)_ +- [~] Baseline OCaml programs at `lib/ocaml/baseline/` exercised through + `ocaml-run-program`. Currently 5/5: factorial.ml (recursion), + list_ops.ml (List.map + fold_left), option_match.ml (option + + pattern match), module_use.ml (module + ref + closure + + sequenced calls), sum_squares.ml (for-loop + ref). Real OCaml + testsuite vendoring is the next step. ### Phase 5 — Hindley-Milner type inference @@ -370,6 +372,15 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5.1 — `lib/ocaml/baseline/` with five sample OCaml + programs (.ml files), driven by `lib/ocaml/baseline/run.sh` through + `ocaml-run-program (file-read F)`. All 5/5 pass: factorial, + list_ops, option_match, module_use (module + ref + closure + + sequenced calls), sum_squares (for-loop). To make module_use parse, + parser's `skip-let-rhs-boundary!` now lookaheads for a matching `in` + before any decl-keyword — distinguishes nested let-in from a new + top-level decl. Test 274 (`let x = 1 let y = 2`) still works because + its body has no inner `in`. - 2026-05-08 Phase 5 — HM with user `type` declarations (+6 tests, 363 total). `ocaml-hm-ctors` is now a mutable list cell; user type-defs register their constructors via `ocaml-hm-register-type-def!`. New