ocaml: phase 5.1 baseline OCaml programs (5/5 pass) + lookahead boundary
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s

lib/ocaml/baseline/{factorial,list_ops,option_match,module_use,sum_squares}.ml
exercised through ocaml-run-program (file-read F). lib/ocaml/baseline/
run.sh runs them and compares against expected.json — all 5 pass.

To make module_use.ml (with nested let-in) parse, parser's
skip-let-rhs-boundary! now uses has-matching-in? lookahead: a let at
depth 0 in a let-decl rhs opens a nested block IFF a matching in
exists before any decl-keyword. Without that in, the let is a new
top-level decl (preserves test 274 'let x = 1 let y = 2').

This is the first piece of Phase 5.1 'vendor a slice of OCaml
testsuite' — handcrafted fixtures for now, real testsuite TBD.
This commit is contained in:
2026-05-08 13:33:24 +00:00
parent 756d5fba64
commit de7be332c8
9 changed files with 217 additions and 17 deletions

View File

@@ -0,0 +1,7 @@
{
"factorial.ml": 3628800,
"list_ops.ml": 30,
"option_match.ml": 5,
"module_use.ml": 3,
"sum_squares.ml": 385
}

View File

@@ -0,0 +1,4 @@
(* Baseline: factorial via let-rec *)
let rec fact n =
if n = 0 then 1 else n * fact (n - 1) ;;
fact 10

View File

@@ -0,0 +1,5 @@
(* Baseline: List functions exercise *)
let xs = [1; 2; 3; 4; 5] ;;
let doubled = List.map (fun x -> x * 2) xs ;;
let total = List.fold_left (fun a b -> a + b) 0 doubled ;;
total

View File

@@ -0,0 +1,14 @@
(* Baseline: module declaration + use *)
module Counter = struct
let make () =
let n = ref 0 in
fun () ->
n := !n + 1 ;
!n
end ;;
let result =
let c = Counter.make () in
let _ = c () in
let _ = c () in
c () ;;
result

View File

@@ -0,0 +1,8 @@
(* Baseline: option type + pattern matching *)
let safe_div a b =
if b = 0 then None else Some (a / b) ;;
let result =
match safe_div 20 4 with
| None -> 0
| Some x -> x ;;
result

75
lib/ocaml/baseline/run.sh Executable file
View File

@@ -0,0 +1,75 @@
#!/usr/bin/env bash
# lib/ocaml/baseline/run.sh — run each baseline OCaml program through
# ocaml-run-program and compare to expected.json.
set -uo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
if [ ! -x "$SX_SERVER" ]; then
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
fi
PASS=0
FAIL=0
ERRORS=""
for f in lib/ocaml/baseline/*.ml; do
name=$(basename "$f")
expected=$(grep -oE "\"$name\"[[:space:]]*:[[:space:]]*[0-9-]+" lib/ocaml/baseline/expected.json | sed -E 's/.*:[[:space:]]*//')
if [ -z "$expected" ]; then
continue
fi
TMP=$(mktemp)
cat > "$TMP" << EPOCHS
(epoch 1)
(load "lib/guest/lex.sx")
(load "lib/guest/prefix.sx")
(load "lib/guest/pratt.sx")
(load "lib/ocaml/tokenizer.sx")
(load "lib/ocaml/parser.sx")
(load "lib/ocaml/eval.sx")
(load "lib/ocaml/runtime.sx")
(eval "(ocaml-load-stdlib!)")
(epoch 2)
(eval "(ocaml-run-program (file-read \\"$f\\"))")
EPOCHS
output=$(timeout 60 "$SX_SERVER" < "$TMP" 2>/dev/null | grep -E '^\(ok-len 2|^\(ok 2' | head -1)
rm -f "$TMP"
# Pull the next line which has the value
result=$(timeout 60 "$SX_SERVER" < <(cat <<EPOCHS
(epoch 1)
(load "lib/guest/lex.sx")
(load "lib/guest/prefix.sx")
(load "lib/guest/pratt.sx")
(load "lib/ocaml/tokenizer.sx")
(load "lib/ocaml/parser.sx")
(load "lib/ocaml/eval.sx")
(load "lib/ocaml/runtime.sx")
(eval "(ocaml-load-stdlib!)")
(epoch 2)
(eval "(ocaml-run-program (file-read \"$f\"))")
EPOCHS
) 2>/dev/null | awk '/^\(ok-len 2 / {getline; print; exit} /^\(ok 2 / {sub(/^\(ok 2 /, ""); sub(/\)$/, ""); print; exit}')
if [ "$result" = "$expected" ]; then
PASS=$((PASS + 1))
echo " ok $name$result"
else
FAIL=$((FAIL + 1))
ERRORS+=" FAIL $name expected=$expected got=$result
"
fi
done
TOTAL=$((PASS + FAIL))
if [ $FAIL -eq 0 ]; then
echo "ok $PASS/$TOTAL baseline OCaml programs run correctly"
else
echo "FAIL $PASS/$TOTAL baseline programs"
echo "$ERRORS"
fi
[ $FAIL -eq 0 ]

View File

@@ -0,0 +1,6 @@
(* Baseline: imperative loop summing squares *)
let total = ref 0 ;;
for i = 1 to 10 do
total := !total + i * i
done ;;
!total

View File

@@ -895,12 +895,88 @@
(fn
()
(let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos)))))
;; skip-to-boundary! advances `idx` to the next top-level decl
;; boundary, tracking `let`/`begin`/`struct` etc. nesting so that
;; an inner `let ... in ...` doesn't terminate a top-level decl
;; body. Boundary tokens (when at depth 0):
;; ;; let module open include and type exception
;; Boundary at any depth: eof.
;; Two flavors of boundary skipping:
;;
;; * `skip-to-decl-boundary!` — used by parse-decl-expr. Stops
;; at the start of the next top-level decl: ;;, let, module,
;; open, include, and, type, exception, or eof.
;;
;; * `skip-let-rhs-boundary!` — used inside parse-decl-let after
;; the `=`. Treats `let` as the opener of a nested let..in
;; block (NOT a decl boundary), so `let f x = let y = 0 in y`
;; parses correctly. Boundary tokens (depth 0): ;;, module,
;; open, include, and, type, exception, or eof.
;; Lookahead: starting just past a `let` at the cursor, scan
;; for a matching `in` before the next decl boundary. Returns
;; true iff such an `in` exists — meaning the let is nested,
;; not a new decl.
(define has-matching-in?
(fn ()
(let ((p (+ idx 1)) (d 1) (result false) (done false))
(begin
(define scan
(fn ()
(when (not done)
(cond
((>= p tok-len) (set! done true))
(else
(let ((t (nth tokens p)))
(let ((tt (ocaml-tok-type t)) (tv (ocaml-tok-value t)))
(cond
((= tt "eof") (set! done true))
((and (= tt "op") (= tv ";;")) (set! done true))
((and (= tt "keyword") (= tv "module")) (set! done true))
((and (= tt "keyword") (= tv "type")) (set! done true))
((and (= tt "keyword") (= tv "exception")) (set! done true))
((and (= tt "keyword") (= tv "open")) (set! done true))
((and (= tt "keyword") (= tv "include")) (set! done true))
((and (= tt "keyword") (= tv "and")) (set! done true))
((and (= tt "keyword") (= tv "let"))
(begin (set! d (+ d 1)) (set! p (+ p 1)) (scan)))
((and (= tt "keyword") (= tv "in"))
(cond
((= d 1) (begin (set! result true) (set! done true)))
(else
(begin (set! d (- d 1)) (set! p (+ p 1)) (scan)))))
(else (begin (set! p (+ p 1)) (scan)))))))))))
(scan)
result))))
;; Same as skip-to-boundary but treats inner `let` as the start
;; of a nested let..in (open depth) IF a matching `in` exists
;; before any decl boundary; otherwise stops.
(define
skip-let-rhs-boundary!
(fn ()
(let ((depth 0))
(begin
(define step
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((and (= depth 0) (at-op? ";;")) nil)
((and (= depth 0) (at-kw? "module")) nil)
((and (= depth 0) (at-kw? "open")) nil)
((and (= depth 0) (at-kw? "include")) nil)
((and (= depth 0) (at-kw? "and")) nil)
((and (= depth 0) (at-kw? "type")) nil)
((and (= depth 0) (at-kw? "exception")) nil)
((and (= depth 0) (at-kw? "let"))
(cond
((has-matching-in?)
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
(else nil)))
((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct")
(at-kw? "sig") (at-kw? "for") (at-kw? "while"))
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
((or (at-kw? "in") (at-kw? "end") (at-kw? "done"))
(begin
(when (> depth 0) (set! depth (- depth 1)))
(advance-tok!) (step)))
(else (begin (advance-tok!) (step))))))
(step)))))
(define
skip-to-boundary!
(fn ()
@@ -919,12 +995,6 @@
((and (= depth 0) (at-kw? "and")) nil)
((and (= depth 0) (at-kw? "type")) nil)
((and (= depth 0) (at-kw? "exception")) nil)
;; Track nested blocks that have explicit closing
;; tokens. let..in / begin..end / struct..end /
;; sig..end / for..done / while..done. `if`/`match`/
;; `try` don't have hard close tokens so we don't
;; track them — their bodies are bounded by the
;; surrounding expression structure.
((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct")
(at-kw? "sig") (at-kw? "for") (at-kw? "while"))
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
@@ -968,7 +1038,7 @@
(consume! "op" "=")
(let ((expr-start (cur-pos)))
(begin
(skip-to-boundary!)
(skip-let-rhs-boundary!)
(let ((expr-src (slice src expr-start (cur-pos))))
(let ((expr (ocaml-parse expr-src)))
(append! bindings (list nm ps expr))))))))))

View File

@@ -214,10 +214,12 @@ SX CEK evaluator (both JS and OCaml hosts)
eval-core, phase2-refs, phase2-loops, phase2-function, phase2-exn,
phase3-adt, phase4-modules, phase5-hm, phase6-stdlib, let-and,
phase1-params, misc), and emits `scoreboard.json` + `scoreboard.md`.
- [ ] Vendor a slice of the OCaml testsuite at `lib/ocaml/baseline/`
and feed it through `ocaml-run-program`, scoring per-file
conformance. _(Pending — needs more stdlib coverage and ADT type
decls to make most testsuite files runnable.)_
- [~] Baseline OCaml programs at `lib/ocaml/baseline/` exercised through
`ocaml-run-program`. Currently 5/5: factorial.ml (recursion),
list_ops.ml (List.map + fold_left), option_match.ml (option +
pattern match), module_use.ml (module + ref + closure +
sequenced calls), sum_squares.ml (for-loop + ref). Real OCaml
testsuite vendoring is the next step.
### Phase 5 — Hindley-Milner type inference
@@ -370,6 +372,15 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
_Newest first._
- 2026-05-08 Phase 5.1 — `lib/ocaml/baseline/` with five sample OCaml
programs (.ml files), driven by `lib/ocaml/baseline/run.sh` through
`ocaml-run-program (file-read F)`. All 5/5 pass: factorial,
list_ops, option_match, module_use (module + ref + closure +
sequenced calls), sum_squares (for-loop). To make module_use parse,
parser's `skip-let-rhs-boundary!` now lookaheads for a matching `in`
before any decl-keyword — distinguishes nested let-in from a new
top-level decl. Test 274 (`let x = 1 let y = 2`) still works because
its body has no inner `in`.
- 2026-05-08 Phase 5 — HM with user `type` declarations (+6 tests, 363
total). `ocaml-hm-ctors` is now a mutable list cell; user type-defs
register their constructors via `ocaml-hm-register-type-def!`. New