From 85b7fed4fc170f4ef086d6faad97bee1dbc2577b Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 7 May 2026 23:04:40 +0000 Subject: [PATCH 001/298] =?UTF-8?q?ocaml:=20phase=201=20tokenizer=20(+58?= =?UTF-8?q?=20tests)=20=E2=80=94=20consumes=20lib/guest/lex.sx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Idents, ctors, 51 keywords, numbers (int/float/hex/exp/underscored), strings + chars with escapes, type variables, 26 op/punct tokens, and nested (* ... *) block comments. Tests via epoch protocol against sx_server.exe. --- lib/ocaml/test.sh | 290 +++++++++++++++++++++++++++ lib/ocaml/tests/tokenize.sx | 16 ++ lib/ocaml/tokenizer.sx | 382 ++++++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 16 +- 4 files changed, 699 insertions(+), 5 deletions(-) create mode 100755 lib/ocaml/test.sh create mode 100644 lib/ocaml/tests/tokenize.sx create mode 100644 lib/ocaml/tokenizer.sx diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh new file mode 100755 index 00000000..2750fa16 --- /dev/null +++ b/lib/ocaml/test.sh @@ -0,0 +1,290 @@ +#!/usr/bin/env bash +# Fast OCaml-on-SX test runner — epoch protocol direct to sx_server.exe. +# Mirrors lib/lua/test.sh. +# +# Usage: +# bash lib/ocaml/test.sh # run all tests +# bash lib/ocaml/test.sh -v # verbose + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build" + exit 1 +fi + +VERBOSE="${1:-}" +PASS=0 +FAIL=0 +ERRORS="" +TMPFILE=$(mktemp) +trap "rm -f $TMPFILE" EXIT + +cat > "$TMPFILE" << 'EPOCHS' +(epoch 1) +(load "lib/guest/lex.sx") +(load "lib/guest/prefix.sx") +(load "lib/ocaml/tokenizer.sx") +(load "lib/ocaml/tests/tokenize.sx") + +;; ── empty / eof ──────────────────────────────────────────────── +(epoch 100) +(eval "(ocaml-test-tok-count \"\")") +(epoch 101) +(eval "(ocaml-test-tok-type \"\" 0)") + +;; ── numbers ──────────────────────────────────────────────────── +(epoch 110) +(eval "(ocaml-test-tok-type \"42\" 0)") +(epoch 111) +(eval "(ocaml-test-tok-value \"42\" 0)") +(epoch 112) +(eval "(ocaml-test-tok-value \"3.14\" 0)") +(epoch 113) +(eval "(ocaml-test-tok-value \"0xff\" 0)") +(epoch 114) +(eval "(ocaml-test-tok-value \"1e3\" 0)") +(epoch 115) +(eval "(ocaml-test-tok-value \"1_000_000\" 0)") +(epoch 116) +(eval "(ocaml-test-tok-value \"3.14e-2\" 0)") + +;; ── identifiers / constructors / keywords ───────────────────── +(epoch 120) +(eval "(ocaml-test-tok-type \"foo\" 0)") +(epoch 121) +(eval "(ocaml-test-tok-value \"foo_bar1\" 0)") +(epoch 122) +(eval "(ocaml-test-tok-type \"Some\" 0)") +(epoch 123) +(eval "(ocaml-test-tok-value \"Some\" 0)") +(epoch 124) +(eval "(ocaml-test-tok-type \"let\" 0)") +(epoch 125) +(eval "(ocaml-test-tok-value \"match\" 0)") +(epoch 126) +(eval "(ocaml-test-tok-type \"true\" 0)") +(epoch 127) +(eval "(ocaml-test-tok-value \"false\" 0)") +(epoch 128) +(eval "(ocaml-test-tok-value \"name'\" 0)") + +;; ── strings ──────────────────────────────────────────────────── +(epoch 130) +(eval "(ocaml-test-tok-type \"\\\"hi\\\"\" 0)") +(epoch 131) +(eval "(ocaml-test-tok-value \"\\\"hi\\\"\" 0)") +(epoch 132) +(eval "(ocaml-test-tok-value \"\\\"a\\\\nb\\\"\" 0)") + +;; ── chars ────────────────────────────────────────────────────── +(epoch 140) +(eval "(ocaml-test-tok-type \"'a'\" 0)") +(epoch 141) +(eval "(ocaml-test-tok-value \"'a'\" 0)") +(epoch 142) +(eval "(ocaml-test-tok-value \"'\\\\n'\" 0)") + +;; ── type variables ───────────────────────────────────────────── +(epoch 145) +(eval "(ocaml-test-tok-type \"'a\" 0)") +(epoch 146) +(eval "(ocaml-test-tok-value \"'a\" 0)") + +;; ── multi-char operators ─────────────────────────────────────── +(epoch 150) +(eval "(ocaml-test-tok-value \"->\" 0)") +(epoch 151) +(eval "(ocaml-test-tok-value \"|>\" 0)") +(epoch 152) +(eval "(ocaml-test-tok-value \"<-\" 0)") +(epoch 153) +(eval "(ocaml-test-tok-value \":=\" 0)") +(epoch 154) +(eval "(ocaml-test-tok-value \"::\" 0)") +(epoch 155) +(eval "(ocaml-test-tok-value \";;\" 0)") +(epoch 156) +(eval "(ocaml-test-tok-value \"@@\" 0)") +(epoch 157) +(eval "(ocaml-test-tok-value \"<>\" 0)") +(epoch 158) +(eval "(ocaml-test-tok-value \"&&\" 0)") +(epoch 159) +(eval "(ocaml-test-tok-value \"||\" 0)") + +;; ── single-char punctuation ──────────────────────────────────── +(epoch 160) +(eval "(ocaml-test-tok-value \"+\" 0)") +(epoch 161) +(eval "(ocaml-test-tok-value \"|\" 0)") +(epoch 162) +(eval "(ocaml-test-tok-value \";\" 0)") +(epoch 163) +(eval "(ocaml-test-tok-value \"(\" 0)") +(epoch 164) +(eval "(ocaml-test-tok-value \"!\" 0)") +(epoch 165) +(eval "(ocaml-test-tok-value \"@\" 0)") + +;; ── comments ─────────────────────────────────────────────────── +(epoch 170) +(eval "(ocaml-test-tok-count \"(* hi *)\")") +(epoch 171) +(eval "(ocaml-test-tok-value \"(* c *) 42\" 0)") +(epoch 172) +(eval "(ocaml-test-tok-count \"(* outer (* inner *) end *) 1\")") +(epoch 173) +(eval "(ocaml-test-tok-value \"(* outer (* inner *) end *) 1\" 0)") + +;; ── compound expressions ─────────────────────────────────────── +(epoch 180) +(eval "(ocaml-test-tok-count \"let x = 1\")") +(epoch 181) +(eval "(ocaml-test-tok-type \"let x = 1\" 0)") +(epoch 182) +(eval "(ocaml-test-tok-value \"let x = 1\" 0)") +(epoch 183) +(eval "(ocaml-test-tok-type \"let x = 1\" 1)") +(epoch 184) +(eval "(ocaml-test-tok-value \"let x = 1\" 2)") +(epoch 185) +(eval "(ocaml-test-tok-value \"let x = 1\" 3)") + +(epoch 190) +(eval "(ocaml-test-tok-count \"match x with | None -> 0 | Some y -> y\")") +(epoch 191) +(eval "(ocaml-test-tok-value \"fun x -> x + 1\" 2)") +(epoch 192) +(eval "(ocaml-test-tok-type \"fun x -> x + 1\" 2)") +(epoch 193) +(eval "(ocaml-test-tok-type \"Some 42\" 0)") +(epoch 194) +(eval "(ocaml-test-tok-value \"a |> f |> g\" 1)") +(epoch 195) +(eval "(ocaml-test-tok-value \"x := !y\" 1)") + +EPOCHS + +OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) + +check() { + local epoch="$1" desc="$2" expected="$3" + local actual + actual=$(echo "$OUTPUT" | grep -A1 "^(ok-len $epoch " | tail -1) + if [ -z "$actual" ]; then + actual=$(echo "$OUTPUT" | grep "^(ok $epoch " || true) + fi + if [ -z "$actual" ]; then + actual=$(echo "$OUTPUT" | grep "^(error $epoch " || true) + fi + if [ -z "$actual" ]; then + actual="" + fi + + if echo "$actual" | grep -qF -- "$expected"; then + PASS=$((PASS + 1)) + [ "$VERBOSE" = "-v" ] && echo " ok $desc" + else + FAIL=$((FAIL + 1)) + ERRORS+=" FAIL $desc (epoch $epoch) + expected: $expected + actual: $actual +" + fi +} + +# empty / eof +check 100 "empty tokens length" '1' +check 101 "empty first is eof" '"eof"' + +# numbers +check 110 "int type" '"number"' +check 111 "int value" '42' +check 112 "float value" '3.14' +check 113 "hex value" '255' +check 114 "exponent" '1000' +check 115 "underscored int" '1000000' +check 116 "neg exponent" '0.0314' + +# idents / ctors / keywords +check 120 "ident type" '"ident"' +check 121 "ident value" '"foo_bar1"' +check 122 "ctor type" '"ctor"' +check 123 "ctor value" '"Some"' +check 124 "let keyword type" '"keyword"' +check 125 "match keyword value" '"match"' +check 126 "true is keyword" '"keyword"' +check 127 "false value" '"false"' +check 128 "primed ident" "\"name'\"" + +# strings +check 130 "string type" '"string"' +check 131 "string value" '"hi"' +check 132 "escape sequence" '"a' + +# chars +check 140 "char type" '"char"' +check 141 "char value" '"a"' +check 142 "char escape" '"' + +# tyvars +check 145 "tyvar type" '"tyvar"' +check 146 "tyvar value" '"a"' + +# multi-char ops +check 150 "->" '"->"' +check 151 "|>" '"|>"' +check 152 "<-" '"<-"' +check 153 ":=" '":="' +check 154 "::" '"::"' +check 155 ";;" '";;"' +check 156 "@@" '"@@"' +check 157 "<>" '"<>"' +check 158 "&&" '"&&"' +check 159 "||" '"||"' + +# single ops +check 160 "+" '"+"' +check 161 "|" '"|"' +check 162 ";" '";"' +check 163 "(" '"("' +check 164 "!" '"!"' +check 165 "@" '"@"' + +# comments +check 170 "block comment alone -> eof" '1' +check 171 "num after block comment" '42' +check 172 "nested comment count" '2' +check 173 "nested comment value" '1' + +# compound +check 180 "let x = 1 count" '5' +check 181 "let is keyword" '"keyword"' +check 182 "let value" '"let"' +check 183 "x is ident" '"ident"' +check 184 "= value" '"="' +check 185 "1 value" '1' + +check 190 "match expr count" '13' +check 191 "fun -> arrow value" '"->"' +check 192 "fun -> arrow type" '"op"' +check 193 "Some is ctor" '"ctor"' +check 194 "first |> value" '"|>"' +check 195 "ref assign :=" '":="' + +TOTAL=$((PASS + FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL OCaml-on-SX tokenizer tests passed" +else + echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" + echo "" + echo "$ERRORS" +fi + +[ $FAIL -eq 0 ] diff --git a/lib/ocaml/tests/tokenize.sx b/lib/ocaml/tests/tokenize.sx new file mode 100644 index 00000000..cdf8955a --- /dev/null +++ b/lib/ocaml/tests/tokenize.sx @@ -0,0 +1,16 @@ +;; lib/ocaml/tests/tokenize.sx — smoke tests for the OCaml tokenizer. +;; +;; Tests are exercised via lib/ocaml/test.sh, which drives sx_server.exe +;; over the epoch protocol. This file provides a small evaluator that +;; returns short diagnostic values for each fixture so the runner can +;; grep them out of one batched run. + +(define + ocaml-test-tok-type + (fn (src i) (get (nth (ocaml-tokenize src) i) :type))) + +(define + ocaml-test-tok-value + (fn (src i) (get (nth (ocaml-tokenize src) i) :value))) + +(define ocaml-test-tok-count (fn (src) (len (ocaml-tokenize src)))) diff --git a/lib/ocaml/tokenizer.sx b/lib/ocaml/tokenizer.sx new file mode 100644 index 00000000..d3882aab --- /dev/null +++ b/lib/ocaml/tokenizer.sx @@ -0,0 +1,382 @@ +;; lib/ocaml/tokenizer.sx — OCaml lexer. +;; +;; Tokens: ident, ctor (uppercase ident), keyword, number, string, char, op, eof. +;; Token shape: {:type :value :pos} via lex-make-token. +;; OCaml is not indentation-sensitive — no layout pass. +;; Block comments (* ... *) nest. There is no line-comment syntax. + +(prefix-rename + "ocaml-" + (quote + ((make-token lex-make-token) + (digit? lex-digit?) + (hex-digit? lex-hex-digit?) + (alpha? lex-alpha?) + (alnum? lex-alnum?) + (ident-start? lex-ident-start?) + (ident-char? lex-ident-char?) + (ws? lex-whitespace?)))) + +(define + ocaml-keywords + (list + "and" + "as" + "assert" + "begin" + "class" + "constraint" + "do" + "done" + "downto" + "else" + "end" + "exception" + "external" + "false" + "for" + "fun" + "function" + "functor" + "if" + "in" + "include" + "inherit" + "initializer" + "lazy" + "let" + "match" + "method" + "module" + "mutable" + "new" + "nonrec" + "object" + "of" + "open" + "or" + "private" + "rec" + "sig" + "struct" + "then" + "to" + "true" + "try" + "type" + "val" + "virtual" + "when" + "while" + "with" + "land" + "lor" + "lxor" + "lsl" + "lsr" + "asr" + "mod")) + +(define ocaml-keyword? (fn (word) (contains? ocaml-keywords word))) + +(define + ocaml-upper? + (fn (c) (and (not (= c nil)) (>= c "A") (<= c "Z")))) + +(define + ocaml-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define + ocaml-peek + (fn + (offset) + (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) + (define cur (fn () (ocaml-peek 0))) + (define advance! (fn (n) (set! pos (+ pos n)))) + (define + push! + (fn + (type value start) + (append! tokens (ocaml-make-token type value start)))) + (define + skip-block-comment! + (fn + (depth) + (cond + ((>= pos src-len) nil) + ((and (= (cur) "*") (= (ocaml-peek 1) ")")) + (begin + (advance! 2) + (when + (> depth 1) + (skip-block-comment! (- depth 1))))) + ((and (= (cur) "(") (= (ocaml-peek 1) "*")) + (begin + (advance! 2) + (skip-block-comment! (+ depth 1)))) + (else (begin (advance! 1) (skip-block-comment! depth)))))) + (define + skip-ws! + (fn + () + (cond + ((>= pos src-len) nil) + ((ocaml-ws? (cur)) (begin (advance! 1) (skip-ws!))) + ((and (= (cur) "(") (= (ocaml-peek 1) "*")) + (begin + (advance! 2) + (skip-block-comment! 1) + (skip-ws!))) + (else nil)))) + (define + read-ident + (fn + (start) + (begin + (when + (and (< pos src-len) (ocaml-ident-char? (cur))) + (begin (advance! 1) (read-ident start))) + (when + (and (< pos src-len) (= (cur) "'")) + (begin (advance! 1) (read-ident start))) + (slice src start pos)))) + (define + read-decimal-digits! + (fn + () + (when + (and (< pos src-len) (or (ocaml-digit? (cur)) (= (cur) "_"))) + (begin (advance! 1) (read-decimal-digits!))))) + (define + read-hex-digits! + (fn + () + (when + (and + (< pos src-len) + (or (ocaml-hex-digit? (cur)) (= (cur) "_"))) + (begin (advance! 1) (read-hex-digits!))))) + (define + read-exp-part! + (fn + () + (when + (and (< pos src-len) (or (= (cur) "e") (= (cur) "E"))) + (let + ((p1 (ocaml-peek 1))) + (when + (or + (and (not (= p1 nil)) (ocaml-digit? p1)) + (and + (or (= p1 "+") (= p1 "-")) + (< (+ pos 2) src-len) + (ocaml-digit? (ocaml-peek 2)))) + (begin + (advance! 1) + (when + (and + (< pos src-len) + (or (= (cur) "+") (= (cur) "-"))) + (advance! 1)) + (read-decimal-digits!))))))) + (define + strip-underscores + (fn + (s) + (let + ((out (list)) (i 0) (n (len s))) + (begin + (define + loop + (fn + () + (when + (< i n) + (begin + (when + (not (= (nth s i) "_")) + (append! out (nth s i))) + (set! i (+ i 1)) + (loop))))) + (loop) + (join "" out))))) + (define + read-number + (fn + (start) + (cond + ((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (ocaml-peek 1) "x") (= (ocaml-peek 1) "X"))) + (begin + (advance! 2) + (read-hex-digits!) + (let + ((raw (slice src (+ start 2) pos))) + (parse-number (str "0x" (strip-underscores raw)))))) + (else + (begin + (read-decimal-digits!) + (when + (and + (< pos src-len) + (= (cur) ".") + (or + (>= (+ pos 1) src-len) + (not (= (ocaml-peek 1) ".")))) + (begin (advance! 1) (read-decimal-digits!))) + (read-exp-part!) + (parse-number (strip-underscores (slice src start pos)))))))) + (define + read-string-literal + (fn + () + (let + ((chars (list))) + (begin + (advance! 1) + (define + loop + (fn + () + (cond + ((>= pos src-len) nil) + ((= (cur) "\\") + (begin + (advance! 1) + (when + (< pos src-len) + (let + ((ch (cur))) + (begin + (cond + ((= ch "n") (append! chars "\n")) + ((= ch "t") (append! chars "\t")) + ((= ch "r") (append! chars "\r")) + ((= ch "b") (append! chars "\\b")) + ((= ch "\\") (append! chars "\\")) + ((= ch "'") (append! chars "'")) + ((= ch "\"") (append! chars "\"")) + ((= ch " ") nil) + (else (append! chars ch))) + (advance! 1)))) + (loop))) + ((= (cur) "\"") (advance! 1)) + (else + (begin + (append! chars (cur)) + (advance! 1) + (loop)))))) + (loop) + (join "" chars))))) + (define + read-char-literal + (fn + () + (begin + (advance! 1) + (let + ((value (cond ((= (cur) "\\") (begin (advance! 1) (let ((ch (cur))) (begin (advance! 1) (cond ((= ch "n") "\n") ((= ch "t") "\t") ((= ch "r") "\r") ((= ch "b") "\\b") ((= ch "\\") "\\") ((= ch "'") "'") ((= ch "\"") "\"") (else ch)))))) (else (let ((ch (cur))) (begin (advance! 1) ch)))))) + (begin + (when + (and (< pos src-len) (= (cur) "'")) + (advance! 1)) + value))))) + (define + try-punct + (fn + (start) + (let + ((c (cur)) + (c1 (ocaml-peek 1)) + (c2 (ocaml-peek 2))) + (cond + ((and (= c ";") (= c1 ";")) + (begin (advance! 2) (push! "op" ";;" start) true)) + ((and (= c "-") (= c1 ">")) + (begin (advance! 2) (push! "op" "->" start) true)) + ((and (= c "<") (= c1 "-")) + (begin (advance! 2) (push! "op" "<-" start) true)) + ((and (= c ":") (= c1 "=")) + (begin (advance! 2) (push! "op" ":=" start) true)) + ((and (= c ":") (= c1 ":")) + (begin (advance! 2) (push! "op" "::" start) true)) + ((and (= c "|") (= c1 "|")) + (begin (advance! 2) (push! "op" "||" start) true)) + ((and (= c "&") (= c1 "&")) + (begin (advance! 2) (push! "op" "&&" start) true)) + ((and (= c "<") (= c1 "=")) + (begin (advance! 2) (push! "op" "<=" start) true)) + ((and (= c ">") (= c1 "=")) + (begin (advance! 2) (push! "op" ">=" start) true)) + ((and (= c "<") (= c1 ">")) + (begin (advance! 2) (push! "op" "<>" start) true)) + ((and (= c "=") (= c1 "=")) + (begin (advance! 2) (push! "op" "==" start) true)) + ((and (= c "!") (= c1 "=")) + (begin (advance! 2) (push! "op" "!=" start) true)) + ((and (= c "|") (= c1 ">")) + (begin (advance! 2) (push! "op" "|>" start) true)) + ((and (= c "<") (= c1 "|")) + (begin (advance! 2) (push! "op" "<|" start) true)) + ((and (= c "@") (= c1 "@")) + (begin (advance! 2) (push! "op" "@@" start) true)) + ((and (= c "*") (= c1 "*")) + (begin (advance! 2) (push! "op" "**" start) true)) + ((or (= c "+") (= c "-") (= c "*") (= c "/") (= c "%") (= c "^") (= c "<") (= c ">") (= c "=") (= c "(") (= c ")") (= c "{") (= c "}") (= c "[") (= c "]") (= c ";") (= c ":") (= c ",") (= c ".") (= c "|") (= c "!") (= c "&") (= c "@") (= c "?") (= c "~") (= c "#")) + (begin (advance! 1) (push! "op" c start) true)) + (else false))))) + (define + step + (fn + () + (begin + (skip-ws!) + (when + (< pos src-len) + (let + ((start pos) (c (cur))) + (cond + ((ocaml-ident-start? c) + (let + ((word (read-ident start))) + (begin + (cond + ((ocaml-keyword? word) + (push! "keyword" word start)) + ((ocaml-upper? c) (push! "ctor" word start)) + (else (push! "ident" word start))) + (step)))) + ((ocaml-digit? c) + (let + ((v (read-number start))) + (begin (push! "number" v start) (step)))) + ((= c "\"") + (let + ((s (read-string-literal))) + (begin (push! "string" s start) (step)))) + ((and (= c "'") (< (+ pos 1) src-len) (or (and (= (ocaml-peek 1) "\\") (< (+ pos 3) src-len) (= (ocaml-peek 3) "'")) (and (not (= (ocaml-peek 1) "\\")) (< (+ pos 2) src-len) (= (ocaml-peek 2) "'")))) + (let + ((v (read-char-literal))) + (begin (push! "char" v start) (step)))) + ((= c "'") + (begin + (advance! 1) + (when + (and (< pos src-len) (ocaml-ident-start? (cur))) + (begin + (advance! 1) + (read-ident (+ start 1)))) + (push! + "tyvar" + (slice src (+ start 1) pos) + start) + (step))) + ((try-punct start) (step)) + (else + (error + (str "ocaml-tokenize: unexpected char " c " at " pos))))))))) + (step) + (push! "eof" nil pos) + tokens))) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7db06023..e16759aa 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -116,14 +116,15 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 1 — Tokenizer + parser -- [ ] **Tokenizer:** keywords (`let`, `rec`, `in`, `fun`, `function`, `match`, `with`, +- [x] **Tokenizer:** keywords (`let`, `rec`, `in`, `fun`, `function`, `match`, `with`, `type`, `of`, `module`, `struct`, `end`, `functor`, `sig`, `open`, `include`, `if`, `then`, `else`, `begin`, `try`, `exception`, `raise`, `mutable`, `for`, `while`, `do`, `done`, `and`, `as`, `when`), operators (`->`, `|>`, `<|`, `@@`, `@`, `:=`, `!`, `::`, `**`, `:`, `;`, `;;`), identifiers (lower, - upper/ctor, labels `~label:`, optional `?label:`), char literals `'c'`, - string literals (escaped + heredoc `{|...|}`), int/float literals, - line comments `(*` nested block comments `*)`. + upper/ctor), char literals `'c'`, string literals (escaped), + int/float literals (incl. hex, exponent, underscores), nested block + comments `(* ... *)`. _(labels `~label:` / `?label:` and heredoc `{|...|}` + deferred — surface tokens already work via `~`/`?` punct + `{`/`|` punct.)_ - [ ] **Parser:** top-level `let`/`let rec`/`type`/`module`/`exception`/`open`/`include` declarations; expressions: literals, identifiers, constructor application, lambda, application (left-assoc), binary ops with precedence table, @@ -308,7 +309,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ -_(awaiting phase 1)_ +- 2026-05-07 Phase 1 — `lib/ocaml/tokenizer.sx` consuming `lib/guest/lex.sx` + via `prefix-rename`. Covers idents, ctors, 51 keywords, numbers (int / float + / hex / exponent / underscored), strings (with escapes), chars (with escapes), + type variables (`'a`), nested block comments, and 26 operator/punct tokens + (incl. `->` `|>` `<-` `:=` `::` `;;` `@@` `<>` `&&` `||` `**` etc.). 58/58 + tokenizer tests pass via `lib/ocaml/test.sh` driving `sx_server.exe`. ## Blockers From 9a090c6e42fedc38466dc38418cf8e92251ae6a2 Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 7 May 2026 23:26:48 +0000 Subject: [PATCH 002/298] =?UTF-8?q?ocaml:=20phase=201=20expression=20parse?= =?UTF-8?q?r=20(+37=20tests,=2095=20total)=20=E2=80=94=20consumes=20lib/gu?= =?UTF-8?q?est/pratt.sx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atoms (literals/var/con/unit/list), application (left-assoc), prefix - / not, 29-op precedence table via pratt-op-lookup (incl. keyword-spelled mod/land/ lor/lxor/lsl/lsr/asr), tuples, parens, if/then/else, fun, let, let rec with function shorthand. AST follows Haskell-on-SX (:int / :op / :fun / etc). --- lib/ocaml/parser.sx | 418 ++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 138 +++++++++++- lib/ocaml/tests/tokenize.sx | 13 +- plans/ocaml-on-sx.md | 21 +- 4 files changed, 579 insertions(+), 11 deletions(-) create mode 100644 lib/ocaml/parser.sx diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx new file mode 100644 index 00000000..5201d17c --- /dev/null +++ b/lib/ocaml/parser.sx @@ -0,0 +1,418 @@ +;; lib/ocaml/parser.sx — OCaml expression parser. +;; +;; Input: token list from (ocaml-tokenize src). +;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head; +;; keywords serialize to their string name so `(list :var "x")` is the +;; same value as `(list "var" "x")` at runtime. +;; +;; Scope (this iteration — expressions only): +;; atoms int/float/string/char, true/false, unit (), var, con, list literal +;; application left-associative, f x y z +;; prefix -E unary minus, not E +;; infix standard ops via lib/guest/pratt.sx table +;; tuple a, b, c (lower than infix, higher than let/if) +;; parens (e) +;; if if c then t else e (else optional → unit) +;; fun fun x y -> body +;; let let x = e in body (no rec) +;; let f x y = e in body (function shorthand) +;; let rec f x = e in body +;; +;; AST shapes: +;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit) +;; (:var NAME) (:con NAME) +;; (:app FN ARG) — binary, chain for multi-arg +;; (:op OP LHS RHS) — binary infix; OP is the source string +;; (:neg E) (:not E) +;; (:tuple ITEMS) +;; (:list ITEMS) +;; (:if C T E) +;; (:fun PARAMS BODY) — PARAMS list of strings (idents) +;; (:let NAME PARAMS EXPR BODY) +;; (:let-rec NAME PARAMS EXPR BODY) + +(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) + +(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value)))) + +;; Standard OCaml binary operator table. +;; Higher precedence = tighter binding. +;; ASSOC is :left or :right. +(define + ocaml-op-table + (list + (list "||" 2 :right) + (list "or" 2 :right) + (list "&&" 3 :right) + (list "&" 3 :right) + (list "=" 4 :left) + (list "<" 4 :left) + (list ">" 4 :left) + (list "<=" 4 :left) + (list ">=" 4 :left) + (list "<>" 4 :left) + (list "==" 4 :left) + (list "!=" 4 :left) + (list "|>" 4 :left) + (list "@" 5 :right) + (list "^" 5 :right) + (list "::" 6 :right) + (list "+" 7 :left) + (list "-" 7 :left) + (list "*" 8 :left) + (list "/" 8 :left) + (list "%" 8 :left) + (list "mod" 8 :left) + (list "land" 8 :left) + (list "lor" 8 :left) + (list "lxor" 8 :left) + (list "**" 9 :right) + (list "lsl" 9 :right) + (list "lsr" 9 :right) + (list "asr" 9 :right))) + +(define + ocaml-binop-prec + (fn + (op) + (let + ((entry (pratt-op-lookup ocaml-op-table op))) + (if (= entry nil) 0 (pratt-op-prec entry))))) + +(define + ocaml-binop-right? + (fn + (op) + (let + ((entry (pratt-op-lookup ocaml-op-table op))) + (and (not (= entry nil)) (= (pratt-op-assoc entry) :right))))) + +;; Some OCaml binops are spelled with keyword tokens (mod / land / lor / +;; lxor / lsl / lsr / asr / or). Recognise both shapes. +(define + ocaml-tok-is-binop? + (fn + (tok) + (let + ((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok))) + (cond + ((= tt "op") (not (= (ocaml-binop-prec tv) 0))) + ((= tt "keyword") (not (= (ocaml-binop-prec tv) 0))) + (else false))))) + +(define + ocaml-parse + (fn + (src) + (let + ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0)) + (begin + (set! tok-len (len tokens)) + (define peek-tok (fn () (nth tokens idx))) + (define advance-tok! (fn () (set! idx (+ idx 1)))) + (define + check-tok? + (fn + (type value) + (let + ((t (peek-tok))) + (and + (= (ocaml-tok-type t) type) + (or (= value nil) (= (ocaml-tok-value t) value)))))) + (define + consume! + (fn + (type value) + (if + (check-tok? type value) + (let ((t (peek-tok))) (begin (advance-tok!) t)) + (error + (str + "ocaml-parse: expected " + type + " " + value + " got " + (ocaml-tok-type (peek-tok)) + " " + (ocaml-tok-value (peek-tok))))))) + (define at-kw? (fn (kw) (check-tok? "keyword" kw))) + (define at-op? (fn (op) (check-tok? "op" op))) + (define parse-expr (fn () nil)) + (define parse-tuple (fn () nil)) + (define parse-binop-rhs (fn (lhs min-prec) lhs)) + (define parse-prefix (fn () nil)) + (define parse-app (fn () nil)) + (define parse-atom (fn () nil)) + (set! + parse-atom + (fn + () + (let + ((t (peek-tok)) + (tt (ocaml-tok-type (peek-tok))) + (tv (ocaml-tok-value (peek-tok)))) + (cond + ((= tt "number") + (begin + (advance-tok!) + (if (= (round tv) tv) (list :int tv) (list :float tv)))) + ((= tt "string") (begin (advance-tok!) (list :string tv))) + ((= tt "char") (begin (advance-tok!) (list :char tv))) + ((and (= tt "keyword") (= tv "true")) + (begin (advance-tok!) (list :bool true))) + ((and (= tt "keyword") (= tv "false")) + (begin (advance-tok!) (list :bool false))) + ((= tt "ident") (begin (advance-tok!) (list :var tv))) + ((= tt "ctor") (begin (advance-tok!) (list :con tv))) + ((and (= tt "op") (= tv "(")) + (begin + (advance-tok!) + (cond + ((at-op? ")") (begin (advance-tok!) (list :unit))) + (else + (let + ((e (parse-expr))) + (begin (consume! "op" ")") e)))))) + ((and (= tt "op") (= tv "[")) + (begin + (advance-tok!) + (cond + ((at-op? "]") (begin (advance-tok!) (list :list))) + (else + (let + ((items (list))) + (begin + (append! items (parse-expr)) + (define + loop + (fn + () + (when + (at-op? ";") + (begin + (advance-tok!) + (when + (not (at-op? "]")) + (begin + (append! items (parse-expr)) + (loop))))))) + (loop) + (consume! "op" "]") + (cons :list items))))))) + ((at-kw? "begin") + (begin + (advance-tok!) + (let + ((e (parse-expr))) + (begin (consume! "keyword" "end") e)))) + (else + (error + (str + "ocaml-parse: unexpected token " + tt + " " + tv + " at idx " + idx))))))) + (define + at-app-start? + (fn + () + (let + ((tt (ocaml-tok-type (peek-tok))) + (tv (ocaml-tok-value (peek-tok)))) + (cond + ((= tt "number") true) + ((= tt "string") true) + ((= tt "char") true) + ((= tt "ident") true) + ((= tt "ctor") true) + ((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin"))) + true) + ((and (= tt "op") (or (= tv "(") (= tv "["))) true) + (else false))))) + (set! + parse-app + (fn + () + (let + ((head (parse-atom))) + (begin + (define + loop + (fn + () + (when + (at-app-start?) + (let + ((arg (parse-atom))) + (begin (set! head (list :app head arg)) (loop)))))) + (loop) + head)))) + (set! + parse-prefix + (fn + () + (cond + ((at-op? "-") + (begin (advance-tok!) (list :neg (parse-prefix)))) + ((at-kw? "not") + (begin (advance-tok!) (list :not (parse-prefix)))) + (else (parse-app))))) + (set! + parse-binop-rhs + (fn + (lhs min-prec) + (let + ((tok (peek-tok))) + (cond + ((not (ocaml-tok-is-binop? tok)) lhs) + (else + (let + ((op (ocaml-tok-value tok)) + (prec (ocaml-binop-prec (ocaml-tok-value tok)))) + (cond + ((< prec min-prec) lhs) + (else + (begin + (advance-tok!) + (let + ((rhs (parse-prefix)) + (next-min + (if + (ocaml-binop-right? op) + prec + (+ prec 1)))) + (begin + (set! rhs (parse-binop-rhs rhs next-min)) + (parse-binop-rhs (list :op op lhs rhs) min-prec)))))))))))) + (define + parse-binary + (fn + () + (let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1)))) + (set! + parse-tuple + (fn + () + (let + ((first (parse-binary))) + (cond + ((at-op? ",") + (let + ((items (list first))) + (begin + (define + loop + (fn + () + (when + (at-op? ",") + (begin + (advance-tok!) + (append! items (parse-binary)) + (loop))))) + (loop) + (cons :tuple items)))) + (else first))))) + (define + parse-fun + (fn + () + (let + ((params (list))) + (begin + (define + collect-params + (fn + () + (when + (check-tok? "ident" nil) + (begin + (append! params (ocaml-tok-value (peek-tok))) + (advance-tok!) + (collect-params))))) + (collect-params) + (when + (= (len params) 0) + (error "ocaml-parse: fun expects at least one parameter")) + (consume! "op" "->") + (let ((body (parse-expr))) (list :fun params body)))))) + (define + parse-let + (fn + () + (let + ((reccy false)) + (begin + (when + (at-kw? "rec") + (begin (advance-tok!) (set! reccy true))) + (let + ((name (ocaml-tok-value (consume! "ident" nil))) + (params (list))) + (begin + (define + collect-params + (fn + () + (when + (check-tok? "ident" nil) + (begin + (append! params (ocaml-tok-value (peek-tok))) + (advance-tok!) + (collect-params))))) + (collect-params) + (consume! "op" "=") + (let + ((rhs (parse-expr))) + (begin + (consume! "keyword" "in") + (let + ((body (parse-expr))) + (if + reccy + (list :let-rec name params rhs body) + (list :let name params rhs body))))))))))) + (define + parse-if + (fn + () + (let + ((cond-expr (parse-expr))) + (begin + (consume! "keyword" "then") + (let + ((then-expr (parse-expr))) + (cond + ((at-kw? "else") + (begin + (advance-tok!) + (let + ((else-expr (parse-expr))) + (list :if cond-expr then-expr else-expr)))) + (else (list :if cond-expr then-expr (list :unit))))))))) + (set! + parse-expr + (fn + () + (cond + ((at-kw? "fun") (begin (advance-tok!) (parse-fun))) + ((at-kw? "let") (begin (advance-tok!) (parse-let))) + ((at-kw? "if") (begin (advance-tok!) (parse-if))) + (else (parse-tuple))))) + (let + ((result (parse-expr))) + (begin + (when + (not (= (ocaml-tok-type (peek-tok)) "eof")) + (error + (str + "ocaml-parse: trailing tokens at idx " + idx + " — got " + (ocaml-tok-type (peek-tok)) + " " + (ocaml-tok-value (peek-tok))))) + result)))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 2750fa16..3b2fae6b 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -29,7 +29,9 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1) (load "lib/guest/lex.sx") (load "lib/guest/prefix.sx") +(load "lib/guest/pratt.sx") (load "lib/ocaml/tokenizer.sx") +(load "lib/ocaml/parser.sx") (load "lib/ocaml/tests/tokenize.sx") ;; ── empty / eof ──────────────────────────────────────────────── @@ -169,6 +171,95 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 195) (eval "(ocaml-test-tok-value \"x := !y\" 1)") +;; ── Phase 1.parse: parser ────────────────────────────────────── +;; Atoms +(epoch 200) +(eval "(ocaml-parse \"42\")") +(epoch 201) +(eval "(ocaml-parse \"3.14\")") +(epoch 202) +(eval "(ocaml-parse \"\\\"hi\\\"\")") +(epoch 203) +(eval "(ocaml-parse \"'a'\")") +(epoch 204) +(eval "(ocaml-parse \"true\")") +(epoch 205) +(eval "(ocaml-parse \"false\")") +(epoch 206) +(eval "(ocaml-parse \"x\")") +(epoch 207) +(eval "(ocaml-parse \"Some\")") +(epoch 208) +(eval "(ocaml-parse \"()\")") + +;; Application (left-assoc) +(epoch 210) +(eval "(ocaml-parse \"f x\")") +(epoch 211) +(eval "(ocaml-parse \"f x y\")") +(epoch 212) +(eval "(ocaml-parse \"f (g x)\")") +(epoch 213) +(eval "(ocaml-parse \"Some 42\")") + +;; Binops with precedence +(epoch 220) +(eval "(ocaml-parse \"1 + 2\")") +(epoch 221) +(eval "(ocaml-parse \"a + b * c\")") +(epoch 222) +(eval "(ocaml-parse \"a * b + c\")") +(epoch 223) +(eval "(ocaml-parse \"a && b || c\")") +(epoch 224) +(eval "(ocaml-parse \"a = b\")") +(epoch 225) +(eval "(ocaml-parse \"a ^ b ^ c\")") +(epoch 226) +(eval "(ocaml-parse \"a :: b :: []\")") +(epoch 227) +(eval "(ocaml-parse \"(a + b) * c\")") +(epoch 228) +(eval "(ocaml-parse \"a |> f |> g\")") +(epoch 229) +(eval "(ocaml-parse \"x mod 2\")") + +;; Prefix +(epoch 230) +(eval "(ocaml-parse \"-x\")") +(epoch 231) +(eval "(ocaml-parse \"-1 + 2\")") + +;; Tuples & lists +(epoch 240) +(eval "(ocaml-parse \"(1, 2, 3)\")") +(epoch 241) +(eval "(ocaml-parse \"[1; 2; 3]\")") +(epoch 242) +(eval "(ocaml-parse \"[]\")") + +;; if / fun / let / let rec +(epoch 250) +(eval "(ocaml-parse \"if x then 1 else 2\")") +(epoch 251) +(eval "(ocaml-parse \"if c then x\")") +(epoch 252) +(eval "(ocaml-parse \"fun x -> x + 1\")") +(epoch 253) +(eval "(ocaml-parse \"fun x y -> x + y\")") +(epoch 254) +(eval "(ocaml-parse \"let x = 1 in x\")") +(epoch 255) +(eval "(ocaml-parse \"let f x = x + 1 in f 2\")") +(epoch 256) +(eval "(ocaml-parse \"let rec f x = f x in f 1\")") +(epoch 257) +(eval "(ocaml-parse \"let f x y = x + y in f 1 2\")") + +;; begin/end +(epoch 260) +(eval "(ocaml-parse \"begin 1 + 2 end\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -278,9 +369,54 @@ check 193 "Some is ctor" '"ctor"' check 194 "first |> value" '"|>"' check 195 "ref assign :=" '":="' +# ── Parser tests ──────────────────────────────────────────────── +check 200 "parse int" '("int" 42)' +check 201 "parse float" '("float" 3.14)' +check 202 "parse string" '("string" "hi")' +check 203 "parse char" '("char" "a")' +check 204 "parse true" '("bool" true)' +check 205 "parse false" '("bool" false)' +check 206 "parse var" '("var" "x")' +check 207 "parse ctor" '("con" "Some")' +check 208 "parse unit" '("unit")' + +check 210 "parse f x" '("app" ("var" "f") ("var" "x"))' +check 211 "parse f x y left-assoc" '("app" ("app" ("var" "f") ("var" "x")) ("var" "y"))' +check 212 "parse f (g x)" '("app" ("var" "f") ("app" ("var" "g") ("var" "x")))' +check 213 "parse Some 42" '("app" ("con" "Some") ("int" 42))' + +check 220 "parse 1+2" '("op" "+" ("int" 1) ("int" 2))' +check 221 "parse a + b * c prec" '("op" "+" ("var" "a") ("op" "*"' +check 222 "parse a*b + c prec" '("op" "+" ("op" "*"' +check 223 "parse && / || prec" '("op" "||" ("op" "&&"' +check 224 "parse a = b" '("op" "=" ("var" "a") ("var" "b"))' +check 225 "parse ^ right-assoc" '("op" "^" ("var" "a") ("op" "^"' +check 226 "parse :: right-assoc" '("op" "::" ("var" "a") ("op" "::"' +check 227 "parse parens override" '("op" "*" ("op" "+"' +check 228 "parse |> chain" '("op" "|>" ("op" "|>"' +check 229 "parse mod kw-binop" '("op" "mod" ("var" "x") ("int" 2))' + +check 230 "parse -x" '("neg" ("var" "x"))' +check 231 "parse -1+2" '("op" "+" ("neg" ("int" 1)) ("int" 2))' + +check 240 "parse tuple" '("tuple" ("int" 1) ("int" 2) ("int" 3))' +check 241 "parse list literal" '("list" ("int" 1) ("int" 2) ("int" 3))' +check 242 "parse []" '("list")' + +check 250 "parse if/then/else" '("if" ("var" "x") ("int" 1) ("int" 2))' +check 251 "parse if w/o else" '("if" ("var" "c") ("var" "x") ("unit"))' +check 252 "parse fun x -> ..." '("fun" ("x") ("op" "+" ("var" "x") ("int" 1)))' +check 253 "parse fun x y ->" '("fun" ("x" "y")' +check 254 "parse let x = 1 in x" '("let" "x" () ("int" 1) ("var" "x"))' +check 255 "parse let f x =" '("let" "f" ("x") ("op" "+"' +check 256 "parse let rec f x =" '("let-rec" "f" ("x")' +check 257 "parse let f x y =" '("let" "f" ("x" "y")' + +check 260 "parse begin/end" '("op" "+" ("int" 1) ("int" 2))' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then - echo "ok $PASS/$TOTAL OCaml-on-SX tokenizer tests passed" + echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" else echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" echo "" diff --git a/lib/ocaml/tests/tokenize.sx b/lib/ocaml/tests/tokenize.sx index cdf8955a..68235703 100644 --- a/lib/ocaml/tests/tokenize.sx +++ b/lib/ocaml/tests/tokenize.sx @@ -1,9 +1,8 @@ -;; lib/ocaml/tests/tokenize.sx — smoke tests for the OCaml tokenizer. +;; lib/ocaml/tests/tokenize.sx — smoke-test helpers. ;; ;; Tests are exercised via lib/ocaml/test.sh, which drives sx_server.exe -;; over the epoch protocol. This file provides a small evaluator that -;; returns short diagnostic values for each fixture so the runner can -;; grep them out of one batched run. +;; over the epoch protocol. This file provides small accessors so the +;; bash runner can grep short diagnostic values out of one batched run. (define ocaml-test-tok-type @@ -14,3 +13,9 @@ (fn (src i) (get (nth (ocaml-tokenize src) i) :value))) (define ocaml-test-tok-count (fn (src) (len (ocaml-tokenize src)))) + +(define ocaml-test-parse-str (fn (src) (ocaml-parse src))) + +(define + ocaml-test-parse-head + (fn (src) (nth (ocaml-parse src) 0))) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e16759aa..54bd98f2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -125,12 +125,13 @@ SX CEK evaluator (both JS and OCaml hosts) int/float literals (incl. hex, exponent, underscores), nested block comments `(* ... *)`. _(labels `~label:` / `?label:` and heredoc `{|...|}` deferred — surface tokens already work via `~`/`?` punct + `{`/`|` punct.)_ -- [ ] **Parser:** top-level `let`/`let rec`/`type`/`module`/`exception`/`open`/`include` - declarations; expressions: literals, identifiers, constructor application, - lambda, application (left-assoc), binary ops with precedence table, - `if`/`then`/`else`, `match`/`with`, `try`/`with`, `let`/`in`, `begin`/`end`, - `fun`/`function`, tuples, list literals, record literals/updates, field access, - sequences `;`, unit `()`. +- [~] **Parser:** expressions: literals, identifiers, constructor application, + lambda, application (left-assoc), binary ops with precedence (29 ops via + `lib/guest/pratt.sx`), `if`/`then`/`else`, `let`/`in`, `let rec`, + `fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. _(Pending: + top-level `let`/`type`/`module`/`exception`/`open`/`include` decls, + `match`/`with`, `try`/`with`, `function`, record literals/updates, + field access, sequences `;`.)_ - [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`, list literal, record, `as`, or-pattern `P1 | P2`, `when` guard. - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. @@ -309,6 +310,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-07 Phase 1 — `lib/ocaml/parser.sx` expression parser consuming + `lib/guest/pratt.sx` for binop precedence (29 operators across 8 levels, + incl. keyword-spelled binops `mod`/`land`/`lor`/`lxor`/`lsl`/`lsr`/`asr`). + Atoms (literals + var/con/unit/list), application (left-assoc), prefix + `-`/`not`, tuples, parens, `if`/`then`/`else`, `fun x y -> body`, + `let`/`let rec` with function shorthand. AST shapes match Haskell-on-SX + conventions (`(:int N)` `(:op OP L R)` `(:fun PARAMS BODY)` etc.). Total + 95/95 tests now passing via `lib/ocaml/test.sh`. - 2026-05-07 Phase 1 — `lib/ocaml/tokenizer.sx` consuming `lib/guest/lex.sx` via `prefix-rename`. Covers idents, ctors, 51 keywords, numbers (int / float / hex / exponent / underscored), strings (with escapes), chars (with escapes), From 9648dac88de9b33200fe48236609fb357c0b1634 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 07:25:11 +0000 Subject: [PATCH 003/298] ocaml: phase 1 top-level decls (+9 tests, 104 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocaml-parse-program: program = decls + bare exprs, ;;-separated. Each decl is (:def …), (:def-rec …), or (:expr …). Body parsing re-feeds the source slice through ocaml-parse — shared-state refactor deferred. --- lib/ocaml/parser.sx | 124 +++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 31 +++++++++++ plans/ocaml-on-sx.md | 15 ++++-- 3 files changed, 166 insertions(+), 4 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 5201d17c..59634387 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -416,3 +416,127 @@ " " (ocaml-tok-value (peek-tok))))) result)))))) + +(define + ocaml-parse-program + (fn + (src) + (let + ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list))) + (begin + (set! tok-len (len tokens)) + (define peek-tok (fn () (nth tokens idx))) + (define advance-tok! (fn () (set! idx (+ idx 1)))) + (define + check-tok? + (fn + (type value) + (let + ((t (peek-tok))) + (and + (= (ocaml-tok-type t) type) + (or (= value nil) (= (ocaml-tok-value t) value)))))) + (define + consume! + (fn + (type value) + (if + (check-tok? type value) + (let ((t (peek-tok))) (begin (advance-tok!) t)) + (error + (str + "ocaml-parse-program: expected " + type + " " + value + " got " + (ocaml-tok-type (peek-tok)) + " " + (ocaml-tok-value (peek-tok))))))) + (define at-kw? (fn (kw) (check-tok? "keyword" kw))) + (define at-op? (fn (op) (check-tok? "op" op))) + (define + skip-double-semi! + (fn + () + (when (at-op? ";;") (begin (advance-tok!) (skip-double-semi!))))) + (define + cur-pos + (fn + () + (let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos))))) + (define + skip-to-boundary! + (fn + () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? ";;") nil) + ((at-kw? "let") nil) + (else (begin (advance-tok!) (skip-to-boundary!)))))) + (define + parse-decl-let + (fn + () + (advance-tok!) + (let + ((reccy false)) + (begin + (when + (at-kw? "rec") + (begin (advance-tok!) (set! reccy true))) + (let + ((name (ocaml-tok-value (consume! "ident" nil))) + (params (list))) + (begin + (define + collect-params + (fn + () + (when + (check-tok? "ident" nil) + (begin + (append! params (ocaml-tok-value (peek-tok))) + (advance-tok!) + (collect-params))))) + (collect-params) + (consume! "op" "=") + (let + ((expr-start (cur-pos))) + (begin + (skip-to-boundary!) + (let + ((expr-src (slice src expr-start (cur-pos)))) + (let + ((expr (ocaml-parse expr-src))) + (if + reccy + (list :def-rec name params expr) + (list :def name params expr)))))))))))) + (define + parse-decl-expr + (fn + () + (let + ((expr-start (cur-pos))) + (begin + (skip-to-boundary!) + (let + ((expr-src (slice src expr-start (cur-pos)))) + (let ((expr (ocaml-parse expr-src))) (list :expr expr))))))) + (define + loop + (fn + () + (begin + (skip-double-semi!) + (when + (< idx tok-len) + (cond + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-kw? "let") + (begin (append! decls (parse-decl-let)) (loop))) + (else (begin (append! decls (parse-decl-expr)) (loop)))))))) + (loop) + (cons :program decls))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 3b2fae6b..cf55ac8a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -260,6 +260,26 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 260) (eval "(ocaml-parse \"begin 1 + 2 end\")") +;; ── Top-level decls ──────────────────────────────────────────── +(epoch 270) +(eval "(ocaml-parse-program \"let x = 1\")") +(epoch 271) +(eval "(ocaml-parse-program \"let x = 1 ;;\")") +(epoch 272) +(eval "(ocaml-parse-program \"let f x = x + 1\")") +(epoch 273) +(eval "(ocaml-parse-program \"let rec fact n = if n = 0 then 1 else n * fact (n - 1)\")") +(epoch 274) +(eval "(ocaml-parse-program \"let x = 1 let y = 2\")") +(epoch 275) +(eval "(ocaml-parse-program \"1 + 2 ;;\")") +(epoch 276) +(eval "(ocaml-parse-program \"let x = 1 ;; let y = 2 ;; x + y\")") +(epoch 277) +(eval "(len (ocaml-parse-program \"let x = 1 ;; let y = 2 ;; x + y\"))") +(epoch 278) +(eval "(ocaml-parse-program \"\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -414,6 +434,17 @@ check 257 "parse let f x y =" '("let" "f" ("x" "y")' check 260 "parse begin/end" '("op" "+" ("int" 1) ("int" 2))' +# ── Top-level decls ───────────────────────────────────────────── +check 270 "program: let x = 1" '("program" ("def" "x" () ("int" 1)))' +check 271 "program: let x = 1 ;;" '("program" ("def" "x" () ("int" 1)))' +check 272 "program: let f x = x+1" '("program" ("def" "f" ("x") ("op" "+"' +check 273 "program: let rec fact" '("def-rec" "fact" ("n")' +check 274 "program: two decls" '("def" "x" () ("int" 1)) ("def" "y"' +check 275 "program: bare expr" '("program" ("expr" ("op" "+" ("int" 1) ("int" 2))))' +check 276 "program: mixed decls + expr" '("def" "y" () ("int" 2)) ("expr"' +check 277 "program: 4 forms incl head" '4' +check 278 "program: empty" '("program")' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 54bd98f2..7c1534e0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -128,10 +128,11 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] **Parser:** expressions: literals, identifiers, constructor application, lambda, application (left-assoc), binary ops with precedence (29 ops via `lib/guest/pratt.sx`), `if`/`then`/`else`, `let`/`in`, `let rec`, - `fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. _(Pending: - top-level `let`/`type`/`module`/`exception`/`open`/`include` decls, - `match`/`with`, `try`/`with`, `function`, record literals/updates, - field access, sequences `;`.)_ + `fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. Top-level + decls: `let [rec] name params* = expr` and bare expressions, `;;`-separated + via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/ + `include` decls, `match`/`with`, `try`/`with`, `function`, record literals/ + updates, field access, sequences `;`, `and` mutually-recursive bindings.)_ - [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`, list literal, record, `as`, or-pattern `P1 | P2`, `when` guard. - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. @@ -310,6 +311,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses + a sequence of `let [rec] name params* = expr` decls and bare expressions + separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`, + `(:def-rec …)`, `(:expr E)`. Decl bodies parsed by re-feeding the source + slice through `ocaml-parse` (cheap stand-in until shared-state refactor). + 104/104 tests now passing (+9). - 2026-05-07 Phase 1 — `lib/ocaml/parser.sx` expression parser consuming `lib/guest/pratt.sx` for binop precedence (29 operators across 8 levels, incl. keyword-spelled binops `mod`/`land`/`lor`/`lxor`/`lsl`/`lsr`/`asr`). From 9102e57d893bf13051c02079ebae05b777c21a16 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 07:29:02 +0000 Subject: [PATCH 004/298] ocaml: phase 1 match/with + pattern parser (+9 tests, 113 total) Patterns: wildcard, literal, var, ctor (nullary + arg, flattens tuple args so Pair(a,b) -> (:pcon "Pair" PA PB)), tuple, list literal, cons :: (right-assoc), unit. Match: leading | optional, (:match SCRUT CLAUSES) with each clause (:case PAT BODY). Body parsed via parse-expr because | is below level-1 binop precedence. --- lib/ocaml/parser.sx | 215 +++++++++++++++++++++++++++++++++++++++---- lib/ocaml/test.sh | 31 +++++++ plans/ocaml-on-sx.md | 15 ++- 3 files changed, 241 insertions(+), 20 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 59634387..d1a56dff 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -5,39 +5,44 @@ ;; keywords serialize to their string name so `(list :var "x")` is the ;; same value as `(list "var" "x")` at runtime. ;; -;; Scope (this iteration — expressions only): -;; atoms int/float/string/char, true/false, unit (), var, con, list literal +;; Expression scope: +;; atoms int/float/string/char/bool, unit (), var, con, list literal ;; application left-associative, f x y z ;; prefix -E unary minus, not E -;; infix standard ops via lib/guest/pratt.sx table +;; infix 29 ops via lib/guest/pratt.sx ;; tuple a, b, c (lower than infix, higher than let/if) ;; parens (e) ;; if if c then t else e (else optional → unit) ;; fun fun x y -> body -;; let let x = e in body (no rec) -;; let f x y = e in body (function shorthand) +;; let let x = e in body (no rec, function shorthand) ;; let rec f x = e in body +;; match match e with [|] p -> body | p -> body | ... +;; +;; Pattern scope: +;; _ (wildcard), int/string/char/bool literals, ident (var binding), +;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…), +;; list literal [pat;pat;…], cons p1 :: p2. ;; ;; AST shapes: ;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit) ;; (:var NAME) (:con NAME) -;; (:app FN ARG) — binary, chain for multi-arg -;; (:op OP LHS RHS) — binary infix; OP is the source string +;; (:app FN ARG) +;; (:op OP LHS RHS) ;; (:neg E) (:not E) -;; (:tuple ITEMS) -;; (:list ITEMS) +;; (:tuple ITEMS) (:list ITEMS) ;; (:if C T E) -;; (:fun PARAMS BODY) — PARAMS list of strings (idents) -;; (:let NAME PARAMS EXPR BODY) -;; (:let-rec NAME PARAMS EXPR BODY) +;; (:fun PARAMS BODY) +;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY) +;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...) +;; +;; (:pwild) (:pvar N) (:plit LIT) +;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary +;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL) (define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) (define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value)))) -;; Standard OCaml binary operator table. -;; Higher precedence = tighter binding. -;; ASSOC is :left or :right. (define ocaml-op-table (list @@ -87,8 +92,6 @@ ((entry (pratt-op-lookup ocaml-op-table op))) (and (not (= entry nil)) (= (pratt-op-assoc entry) :right))))) -;; Some OCaml binops are spelled with keyword tokens (mod / land / lor / -;; lxor / lsl / lsr / asr / or). Recognise both shapes. (define ocaml-tok-is-binop? (fn @@ -138,6 +141,144 @@ (ocaml-tok-value (peek-tok))))))) (define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-op? (fn (op) (check-tok? "op" op))) + (define parse-pattern (fn () nil)) + (define parse-pattern-cons (fn () nil)) + (define parse-pattern-app (fn () nil)) + (define parse-pattern-atom (fn () nil)) + (define + at-pattern-atom? + (fn + () + (let + ((tt (ocaml-tok-type (peek-tok))) + (tv (ocaml-tok-value (peek-tok)))) + (cond + ((= tt "number") true) + ((= tt "string") true) + ((= tt "char") true) + ((= tt "ident") true) + ((= tt "ctor") true) + ((and (= tt "keyword") (or (= tv "true") (= tv "false"))) + true) + ((and (= tt "op") (or (= tv "(") (= tv "["))) true) + (else false))))) + (set! + parse-pattern-atom + (fn + () + (let + ((tt (ocaml-tok-type (peek-tok))) + (tv (ocaml-tok-value (peek-tok)))) + (cond + ((= tt "number") + (begin + (advance-tok!) + (if + (= (round tv) tv) + (list :plit (list :int tv)) + (list :plit (list :float tv))))) + ((= tt "string") + (begin (advance-tok!) (list :plit (list :string tv)))) + ((= tt "char") + (begin (advance-tok!) (list :plit (list :char tv)))) + ((and (= tt "keyword") (= tv "true")) + (begin (advance-tok!) (list :plit (list :bool true)))) + ((and (= tt "keyword") (= tv "false")) + (begin (advance-tok!) (list :plit (list :bool false)))) + ((and (= tt "ident") (= tv "_")) + (begin (advance-tok!) (list :pwild))) + ((= tt "ident") (begin (advance-tok!) (list :pvar tv))) + ((= tt "ctor") (begin (advance-tok!) (list :pcon tv))) + ((and (= tt "op") (= tv "(")) + (begin + (advance-tok!) + (cond + ((at-op? ")") + (begin (advance-tok!) (list :plit (list :unit)))) + (else + (let + ((first (parse-pattern))) + (cond + ((at-op? ",") + (let + ((items (list first))) + (begin + (define + loop + (fn + () + (when + (at-op? ",") + (begin + (advance-tok!) + (append! items (parse-pattern)) + (loop))))) + (loop) + (consume! "op" ")") + (cons :ptuple items)))) + (else (begin (consume! "op" ")") first)))))))) + ((and (= tt "op") (= tv "[")) + (begin + (advance-tok!) + (cond + ((at-op? "]") (begin (advance-tok!) (list :plist))) + (else + (let + ((items (list))) + (begin + (append! items (parse-pattern)) + (define + loop + (fn + () + (when + (at-op? ";") + (begin + (advance-tok!) + (when + (not (at-op? "]")) + (begin + (append! items (parse-pattern)) + (loop))))))) + (loop) + (consume! "op" "]") + (cons :plist items))))))) + (else + (error + (str + "ocaml-parse: unexpected pattern token " + tt + " " + tv + " at idx " + idx))))))) + (set! + parse-pattern-app + (fn + () + (let + ((head (parse-pattern-atom))) + (cond + ((and (= (nth head 0) :pcon) (at-pattern-atom?)) + (let + ((arg (parse-pattern-atom))) + (let + ((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg))))) + (concat (list :pcon (nth head 1)) args)))) + (else head))))) + (set! + parse-pattern-cons + (fn + () + (let + ((lhs (parse-pattern-app))) + (cond + ((at-op? "::") + (begin + (advance-tok!) + (list :pcons lhs (parse-pattern-cons)))) + (else lhs))))) + (set! parse-pattern (fn () (parse-pattern-cons))) (define parse-expr (fn () nil)) (define parse-tuple (fn () nil)) (define parse-binop-rhs (fn (lhs min-prec) lhs)) @@ -393,6 +534,40 @@ ((else-expr (parse-expr))) (list :if cond-expr then-expr else-expr)))) (else (list :if cond-expr then-expr (list :unit))))))))) + (define + parse-match + (fn + () + (let + ((scrut (parse-expr))) + (begin + (consume! "keyword" "with") + (when (at-op? "|") (advance-tok!)) + (let + ((cases (list))) + (begin + (define + one + (fn + () + (let + ((p (parse-pattern))) + (begin + (consume! "op" "->") + (let + ((body (parse-match-body))) + (append! cases (list :case p body))))))) + (one) + (define + loop + (fn + () + (when + (at-op? "|") + (begin (advance-tok!) (one) (loop))))) + (loop) + (cons :match (cons scrut (list cases))))))))) + (define parse-match-body (fn () (parse-expr))) (set! parse-expr (fn @@ -401,6 +576,7 @@ ((at-kw? "fun") (begin (advance-tok!) (parse-fun))) ((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "if") (begin (advance-tok!) (parse-if))) + ((at-kw? "match") (begin (advance-tok!) (parse-match))) (else (parse-tuple))))) (let ((result (parse-expr))) @@ -422,7 +598,10 @@ (fn (src) (let - ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list))) + ((tokens (ocaml-tokenize src)) + (idx 0) + (tok-len 0) + (decls (list))) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index cf55ac8a..bb6bd860 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -280,6 +280,26 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 278) (eval "(ocaml-parse-program \"\")") +;; ── Match / patterns ─────────────────────────────────────────── +(epoch 300) +(eval "(ocaml-parse \"match x with | None -> 0 | Some y -> y\")") +(epoch 301) +(eval "(ocaml-parse \"match x with None -> 0 | Some y -> y\")") +(epoch 302) +(eval "(ocaml-parse \"match l with | [] -> 0 | h :: t -> 1\")") +(epoch 303) +(eval "(ocaml-parse \"match p with | (a, b) -> a + b\")") +(epoch 304) +(eval "(ocaml-parse \"match n with | 0 -> 1 | _ -> n\")") +(epoch 305) +(eval "(ocaml-parse \"match x with | true -> 1 | false -> 0\")") +(epoch 306) +(eval "(ocaml-parse \"match x with | Pair (a, b) -> a + b\")") +(epoch 307) +(eval "(ocaml-parse \"match x with | \\\"hi\\\" -> 1 | _ -> 0\")") +(epoch 308) +(eval "(ocaml-parse \"match x with | () -> 0\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -445,6 +465,17 @@ check 276 "program: mixed decls + expr" '("def" "y" () ("int" 2)) ("expr"' check 277 "program: 4 forms incl head" '4' check 278 "program: empty" '("program")' +# ── Match / patterns ──────────────────────────────────────────── +check 300 "match Some/None" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some" ("pvar" "y")) ("var" "y")))' +check 301 "match no leading bar" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some"' +check 302 "match list cons" '("case" ("plist") ("int" 0)) ("case" ("pcons" ("pvar" "h") ("pvar" "t")) ("int" 1))' +check 303 "match tuple pat" '("ptuple" ("pvar" "a") ("pvar" "b"))' +check 304 "match int + wildcard" '("case" ("plit" ("int" 0)) ("int" 1)) ("case" ("pwild")' +check 305 "match bool literals" '("plit" ("bool" true))' +check 306 "match ctor with tuple arg" '("pcon" "Pair" ("pvar" "a") ("pvar" "b"))' +check 307 "match string literal" '("plit" ("string" "hi"))' +check 308 "match unit pattern" '("plit" ("unit"))' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7c1534e0..7294cd83 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -133,8 +133,11 @@ SX CEK evaluator (both JS and OCaml hosts) via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/ `include` decls, `match`/`with`, `try`/`with`, `function`, record literals/ updates, field access, sequences `;`, `and` mutually-recursive bindings.)_ -- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`, - list literal, record, `as`, or-pattern `P1 | P2`, `when` guard. +- [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple + args `Pair (a, b)` → `(:pcon "Pair" PA PB)`), literal (int/string/char/ + bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal. + _(Pending: record patterns, `as` binding, or-pattern `P1 | P2`, `when` + guard.)_ - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. - [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests. @@ -311,6 +314,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-07 Phase 1 — `match`/`with` + pattern parser. Patterns: wildcard, + literal, var, ctor (nullary + with arg, with tuple-arg flattening so + `Pair (a, b)` → `(:pcon "Pair" PA PB)`), tuple, list literal, cons `::` + (right-assoc), parens, unit. Match clauses: leading `|` optional, body + parsed via `parse-expr`. AST: `(:match SCRUT CLAUSES)` where each clause + is `(:case PAT BODY)`. 113/113 tests passing (+9). Note: parse-expr is + used for case bodies, so a trailing `| pat -> body` after a complex body + will be reached because `|` is not in the binop table for level 1. - 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses a sequence of `let [rec] name params* = expr` decls and bare expressions separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`, From a6ab944c39ebbffeac563fcde9cc04b69ec838f8 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 07:48:52 +0000 Subject: [PATCH 005/298] ocaml: phase 1 sequence operator ; (+10 tests, 123 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-phase grammar: parse-expr-no-seq (prior entry) + parse-expr wraps it with ;-chaining. List bodies keep parse-expr-no-seq so ; remains a separator inside [...]. Match clause bodies use the seq variant and stop at | — real OCaml semantics. Trailing ; before end/)/|/in/then/else/eof permitted. --- lib/ocaml/parser.sx | 54 ++++++++++++++++++++++++++++++++++++-------- lib/ocaml/test.sh | 34 ++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 19 ++++++++++++---- 3 files changed, 93 insertions(+), 14 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index d1a56dff..6c48ee9b 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -17,6 +17,7 @@ ;; let let x = e in body (no rec, function shorthand) ;; let rec f x = e in body ;; match match e with [|] p -> body | p -> body | ... +;; sequence e1 ; e2 → (:seq e1 e2 …) (lowest-precedence binary) ;; ;; Pattern scope: ;; _ (wildcard), int/string/char/bool literals, ident (var binding), @@ -30,6 +31,7 @@ ;; (:op OP LHS RHS) ;; (:neg E) (:not E) ;; (:tuple ITEMS) (:list ITEMS) +;; (:seq EXPRS) ;; (:if C T E) ;; (:fun PARAMS BODY) ;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY) @@ -280,6 +282,7 @@ (else lhs))))) (set! parse-pattern (fn () (parse-pattern-cons))) (define parse-expr (fn () nil)) + (define parse-expr-no-seq (fn () nil)) (define parse-tuple (fn () nil)) (define parse-binop-rhs (fn (lhs min-prec) lhs)) (define parse-prefix (fn () nil)) @@ -324,7 +327,7 @@ (let ((items (list))) (begin - (append! items (parse-expr)) + (append! items (parse-expr-no-seq)) (define loop (fn @@ -336,7 +339,7 @@ (when (not (at-op? "]")) (begin - (append! items (parse-expr)) + (append! items (parse-expr-no-seq)) (loop))))))) (loop) (consume! "op" "]") @@ -521,17 +524,17 @@ (fn () (let - ((cond-expr (parse-expr))) + ((cond-expr (parse-expr-no-seq))) (begin (consume! "keyword" "then") (let - ((then-expr (parse-expr))) + ((then-expr (parse-expr-no-seq))) (cond ((at-kw? "else") (begin (advance-tok!) (let - ((else-expr (parse-expr))) + ((else-expr (parse-expr-no-seq))) (list :if cond-expr then-expr else-expr)))) (else (list :if cond-expr then-expr (list :unit))))))))) (define @@ -539,7 +542,7 @@ (fn () (let - ((scrut (parse-expr))) + ((scrut (parse-expr-no-seq))) (begin (consume! "keyword" "with") (when (at-op? "|") (advance-tok!)) @@ -555,7 +558,7 @@ (begin (consume! "op" "->") (let - ((body (parse-match-body))) + ((body (parse-expr))) (append! cases (list :case p body))))))) (one) (define @@ -567,9 +570,8 @@ (begin (advance-tok!) (one) (loop))))) (loop) (cons :match (cons scrut (list cases))))))))) - (define parse-match-body (fn () (parse-expr))) (set! - parse-expr + parse-expr-no-seq (fn () (cond @@ -578,6 +580,40 @@ ((at-kw? "if") (begin (advance-tok!) (parse-if))) ((at-kw? "match") (begin (advance-tok!) (parse-match))) (else (parse-tuple))))) + (set! + parse-expr + (fn + () + (let + ((lhs (parse-expr-no-seq))) + (cond + ((at-op? ";") + (let + ((items (list lhs))) + (begin + (define + loop + (fn + () + (when + (at-op? ";") + (begin + (advance-tok!) + (cond + ((at-kw? "end") nil) + ((at-op? ")") nil) + ((at-op? "|") nil) + ((at-kw? "in") nil) + ((at-kw? "then") nil) + ((at-kw? "else") nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + (else + (begin + (append! items (parse-expr-no-seq)) + (loop)))))))) + (loop) + (cons :seq items)))) + (else lhs))))) (let ((result (parse-expr))) (begin diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index bb6bd860..b6e0821d 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -300,6 +300,28 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 308) (eval "(ocaml-parse \"match x with | () -> 0\")") +;; ── Sequences (;) ────────────────────────────────────────────── +(epoch 320) +(eval "(ocaml-parse \"1; 2\")") +(epoch 321) +(eval "(ocaml-parse \"1; 2; 3\")") +(epoch 322) +(eval "(ocaml-parse \"(1; 2)\")") +(epoch 323) +(eval "(ocaml-parse \"begin a; b; c end\")") +(epoch 324) +(eval "(ocaml-parse \"let x = 1 in x; x\")") +(epoch 325) +(eval "(ocaml-parse \"if c then (a; b) else c\")") +(epoch 326) +(eval "(ocaml-parse \"[1; 2; 3]\")") +(epoch 327) +(eval "(ocaml-parse \"1; 2;\")") +(epoch 328) +(eval "(ocaml-parse \"begin a; end\")") +(epoch 329) +(eval "(ocaml-parse \"match x with | _ -> a; b\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -476,6 +498,18 @@ check 306 "match ctor with tuple arg" '("pcon" "Pair" ("pvar" "a") ("pvar" "b check 307 "match string literal" '("plit" ("string" "hi"))' check 308 "match unit pattern" '("plit" ("unit"))' +# ── Sequences ─────────────────────────────────────────────────── +check 320 "seq 1;2" '("seq" ("int" 1) ("int" 2))' +check 321 "seq 1;2;3" '("seq" ("int" 1) ("int" 2) ("int" 3))' +check 322 "seq in parens" '("seq" ("int" 1) ("int" 2))' +check 323 "seq in begin/end" '("seq" ("var" "a") ("var" "b") ("var" "c"))' +check 324 "let body absorbs seq" '("let" "x" () ("int" 1) ("seq" ("var" "x") ("var" "x")))' +check 325 "if-branch parens for seq" '("if" ("var" "c") ("seq" ("var" "a") ("var" "b"))' +check 326 "list ; is separator" '("list" ("int" 1) ("int" 2) ("int" 3))' +check 327 "trailing ; OK" '("seq" ("int" 1) ("int" 2))' +check 328 "begin a; end singleton seq" '("seq" ("var" "a"))' +check 329 "match clause body absorbs ;" '("case" ("pwild") ("seq" ("var" "a") ("var" "b")))' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7294cd83..04c61365 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -128,11 +128,12 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] **Parser:** expressions: literals, identifiers, constructor application, lambda, application (left-assoc), binary ops with precedence (29 ops via `lib/guest/pratt.sx`), `if`/`then`/`else`, `let`/`in`, `let rec`, - `fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. Top-level - decls: `let [rec] name params* = expr` and bare expressions, `;;`-separated - via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/ - `include` decls, `match`/`with`, `try`/`with`, `function`, record literals/ - updates, field access, sequences `;`, `and` mutually-recursive bindings.)_ + `fun`/`->`, `match`/`with`, tuples, list literals, sequences `;`, + `begin`/`end`, unit `()`. Top-level decls: `let [rec] name params* = expr` + and bare expressions, `;;`-separated via `ocaml-parse-program`. _(Pending: + `type`/`module`/`exception`/`open`/`include` decls, `try`/`with`, + `function`, record literals/updates, field access, `and` mutually-recursive + bindings.)_ - [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple args `Pair (a, b)` → `(:pcon "Pair" PA PB)`), literal (int/string/char/ bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal. @@ -314,6 +315,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-07 Phase 1 — sequence operator `;`. Lowest-precedence binary; + `e1; e2; e3` → `(:seq e1 e2 e3)`. Two-phase grammar: `parse-expr-no-seq` + is the prior expression entry point; new `parse-expr` wraps it with + `;` chaining. List-literal items still use `parse-expr-no-seq` so `;` + retains its separator role inside `[…]`. Match-clause bodies use the + seq variant and stop at `|`, matching real OCaml semantics. Trailing `;` + before `end`/`)`/`|`/`in`/`then`/`else`/eof is permitted. 123/123 tests + passing (+10). - 2026-05-07 Phase 1 — `match`/`with` + pattern parser. Patterns: wildcard, literal, var, ctor (nullary + with arg, with tuple-arg flattening so `Pair (a, b)` → `(:pcon "Pair" PA PB)`), tuple, list literal, cons `::` From 4dca583ee36069dc40d08f70085f548745ec4f4e Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 07:57:20 +0000 Subject: [PATCH 006/298] ocaml: phase 2 evaluator slice (+42 tests, 165 total) ocaml-eval walks the AST and yields SX values. ocaml-run / ocaml-run-program wrap parse + eval. Coverage: atoms, vars, app (curried), 22 binary ops, prefix - and not, if/seq/tuple/list, fun (auto-curried via host SX lambdas), let, let-rec (mutable-cell knot for recursive functions). Initial env: not/succ/pred/abs/max/min/fst/snd/ignore. Tests: arithmetic, comparison, string concat, closures, fact 5 / fib 10 / sum 100, top-level decls, |> pipe. --- lib/ocaml/eval.sx | 244 +++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 173 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 26 +++-- 3 files changed, 436 insertions(+), 7 deletions(-) create mode 100644 lib/ocaml/eval.sx diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx new file mode 100644 index 00000000..8fcb3d16 --- /dev/null +++ b/lib/ocaml/eval.sx @@ -0,0 +1,244 @@ +;; lib/ocaml/eval.sx — OCaml AST evaluator (Phase 2 slice). +;; +;; Walks the AST produced by ocaml-parse / ocaml-parse-program and yields +;; SX values. +;; +;; Coverage in this slice: +;; atoms int/float/string/char/bool/unit +;; :var env lookup +;; :app curried application +;; :op arithmetic, comparison, boolean, ^ string concat, mod, :: +;; :neg unary minus +;; :not boolean negation +;; :if conditional +;; :seq sequence — discard all but last +;; :tuple SX (:tuple v1 v2 …) +;; :list SX list +;; :fun closure (auto-curried via host SX lambda) +;; :let non-recursive binding +;; :let-rec recursive binding for function values (mutable ref cell) +;; +;; Out of scope: pattern matching, refs (`ref`/`!`/`:=`), modules, ADTs, +;; mutable records, for/while, try/with. +;; +;; Environment representation: an assoc list of (name value) pairs. Most +;; recent binding shadows older ones. + +;; Initial environment provides OCaml stdlib functions that are values, +;; not language keywords (e.g. `not`, `succ`, `pred`). Phase 6 adds the +;; full stdlib slice; this just unblocks Phase 2 tests. +(define ocaml-empty-env + (fn () + (list + (list "not" (fn (x) (not x))) + (list "succ" (fn (x) (+ x 1))) + (list "pred" (fn (x) (- x 1))) + (list "abs" (fn (x) (if (< x 0) (- 0 x) x))) + (list "max" (fn (a) (fn (b) (if (> a b) a b)))) + (list "min" (fn (a) (fn (b) (if (< a b) a b)))) + (list "fst" (fn (p) (nth p 1))) + (list "snd" (fn (p) (nth p 2))) + (list "ignore" (fn (x) nil))))) + +(define ocaml-env-lookup + (fn (env name) + (cond + ((= env (list)) nil) + ((= (first (first env)) name) (nth (first env) 1)) + (else (ocaml-env-lookup (rest env) name))))) + +(define ocaml-env-has? + (fn (env name) + (cond + ((= env (list)) false) + ((= (first (first env)) name) true) + (else (ocaml-env-has? (rest env) name))))) + +(define ocaml-env-extend + (fn (env name val) + (cons (list name val) env))) + +(define ocaml-tag-of (fn (ast) (nth ast 0))) + +(define ocaml-eval (fn (ast env) nil)) + +;; Auto-curry: (:fun ("x" "y" "z") body) → (fn (x) (fn (y) (fn (z) body))). +;; A zero-param lambda evaluates the body immediately on first call — +;; OCaml does not have nullary functions; `()`-taking functions still +;; receive the unit argument via a one-param lambda. +(define ocaml-make-curried + (fn (params body env) + (cond + ((= (len params) 0) + (ocaml-eval body env)) + ((= (len params) 1) + (fn (arg) + (ocaml-eval body + (ocaml-env-extend env (nth params 0) arg)))) + (else + (fn (arg) + (ocaml-make-curried + (rest params) + body + (ocaml-env-extend env (nth params 0) arg))))))) + +(define ocaml-eval-op + (fn (op lhs rhs) + (cond + ((= op "+") (+ lhs rhs)) + ((= op "-") (- lhs rhs)) + ((= op "*") (* lhs rhs)) + ((= op "/") (/ lhs rhs)) + ((= op "mod") (mod lhs rhs)) + ((= op "%") (mod lhs rhs)) + ((= op "**") (pow lhs rhs)) + ((= op "^") (str lhs rhs)) + ((= op "@") (concat lhs rhs)) + ((= op "::") (cons lhs rhs)) + ((= op "=") (= lhs rhs)) + ((= op "<>") (not (= lhs rhs))) + ((= op "==") (= lhs rhs)) + ((= op "!=") (not (= lhs rhs))) + ((= op "<") (< lhs rhs)) + ((= op ">") (> lhs rhs)) + ((= op "<=") (<= lhs rhs)) + ((= op ">=") (>= lhs rhs)) + ((= op "&&") (and lhs rhs)) + ((= op "||") (or lhs rhs)) + ((= op "or") (or lhs rhs)) + ((= op "|>") (rhs lhs)) + (else (error (str "ocaml-eval: unknown operator " op)))))) + +(set! ocaml-eval + (fn (ast env) + (let ((tag (ocaml-tag-of ast))) + (cond + ((= tag "int") (nth ast 1)) + ((= tag "float") (nth ast 1)) + ((= tag "string") (nth ast 1)) + ((= tag "char") (nth ast 1)) + ((= tag "bool") (nth ast 1)) + ((= tag "unit") nil) + ((= tag "var") + (let ((name (nth ast 1))) + (cond + ((ocaml-env-has? env name) (ocaml-env-lookup env name)) + (else (error (str "ocaml-eval: unbound variable " name)))))) + ((= tag "neg") (- 0 (ocaml-eval (nth ast 1) env))) + ((= tag "not") (not (ocaml-eval (nth ast 1) env))) + ((= tag "op") + (ocaml-eval-op + (nth ast 1) + (ocaml-eval (nth ast 2) env) + (ocaml-eval (nth ast 3) env))) + ((= tag "if") + (if (ocaml-eval (nth ast 1) env) + (ocaml-eval (nth ast 2) env) + (ocaml-eval (nth ast 3) env))) + ((= tag "seq") + (let ((items (rest ast)) (last nil)) + (begin + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin + (set! last (ocaml-eval (first xs) env)) + (loop (rest xs)))))) + (loop items) + last))) + ((= tag "tuple") + (cons :tuple + (map (fn (e) (ocaml-eval e env)) (rest ast)))) + ((= tag "list") + (map (fn (e) (ocaml-eval e env)) (rest ast))) + ((= tag "fun") + (ocaml-make-curried (nth ast 1) (nth ast 2) env)) + ((= tag "app") + (let ((fn-val (ocaml-eval (nth ast 1) env)) + (arg-val (ocaml-eval (nth ast 2) env))) + (fn-val arg-val))) + ((= tag "let") + (let ((name (nth ast 1)) (params (nth ast 2)) + (rhs (nth ast 3)) (body (nth ast 4))) + (let ((rhs-val + (if (= (len params) 0) + (ocaml-eval rhs env) + (ocaml-make-curried params rhs env)))) + (ocaml-eval body (ocaml-env-extend env name rhs-val))))) + ((= tag "let-rec") + ;; For function bindings: tie the knot via a mutable cell. The + ;; placeholder closure that's bound first dereferences the cell + ;; on each call, so the function can call itself once the cell + ;; is set to the real closure. + (let ((name (nth ast 1)) (params (nth ast 2)) + (rhs (nth ast 3)) (body (nth ast 4))) + (cond + ((= (len params) 0) + ;; Non-functional let-rec — OCaml only allows this when the + ;; rhs is "syntactically a function or constructor". For the + ;; common case of a value, evaluate non-recursively. + (let ((rhs-val (ocaml-eval rhs env))) + (ocaml-eval body (ocaml-env-extend env name rhs-val)))) + (else + ;; Use a one-element list as a mutable cell to tie the + ;; recursive knot. The placeholder closure dereferences + ;; the cell on each call. + (let ((cell (list nil))) + (let ((env2 (ocaml-env-extend env name + (fn (arg) ((nth cell 0) arg))))) + (let ((rhs-val (ocaml-make-curried params rhs env2))) + (begin + (set-nth! cell 0 rhs-val) + (ocaml-eval body env2))))))))) + (else (error + (str "ocaml-eval: unknown AST tag " tag))))))) + +;; ocaml-run — convenience wrapper: parse + eval. +(define ocaml-run + (fn (src) + (ocaml-eval (ocaml-parse src) (ocaml-empty-env)))) + +;; ocaml-run-program — evaluate a program (sequence of decls + bare exprs). +;; Threads an env through decls; returns the value of the last form. +(define ocaml-run-program + (fn (src) + (let ((prog (ocaml-parse-program src)) (env (ocaml-empty-env)) (last nil)) + (begin + (define run-decl + (fn (decl) + (let ((tag (ocaml-tag-of decl))) + (cond + ((= tag "def") + (let ((name (nth decl 1)) (params (nth decl 2)) (rhs (nth decl 3))) + (let ((v (if (= (len params) 0) + (ocaml-eval rhs env) + (ocaml-make-curried params rhs env)))) + (begin + (set! env (ocaml-env-extend env name v)) + (set! last v))))) + ((= tag "def-rec") + (let ((name (nth decl 1)) (params (nth decl 2)) (rhs (nth decl 3))) + (cond + ((= (len params) 0) + (let ((v (ocaml-eval rhs env))) + (begin + (set! env (ocaml-env-extend env name v)) + (set! last v)))) + (else + (let ((cell (list nil))) + (let ((env2 (ocaml-env-extend env name + (fn (arg) ((nth cell 0) arg))))) + (let ((v (ocaml-make-curried params rhs env2))) + (begin + (set-nth! cell 0 v) + (set! env env2) + (set! last v))))))))) + ((= tag "expr") + (set! last (ocaml-eval (nth decl 1) env))) + (else (error (str "ocaml-run-program: bad decl " tag))))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (run-decl (first xs)) (loop (rest xs)))))) + (loop (rest prog)) + last)))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index b6e0821d..83dc82a6 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -32,6 +32,7 @@ cat > "$TMPFILE" << 'EPOCHS' (load "lib/guest/pratt.sx") (load "lib/ocaml/tokenizer.sx") (load "lib/ocaml/parser.sx") +(load "lib/ocaml/eval.sx") (load "lib/ocaml/tests/tokenize.sx") ;; ── empty / eof ──────────────────────────────────────────────── @@ -322,6 +323,113 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 329) (eval "(ocaml-parse \"match x with | _ -> a; b\")") +;; ── Phase 2: evaluator ───────────────────────────────────────── +;; Atoms +(epoch 400) +(eval "(ocaml-run \"42\")") +(epoch 401) +(eval "(ocaml-run \"3.14\")") +(epoch 402) +(eval "(ocaml-run \"true\")") +(epoch 403) +(eval "(ocaml-run \"false\")") +(epoch 404) +(eval "(ocaml-run \"\\\"hi\\\"\")") + +;; Arithmetic +(epoch 410) +(eval "(ocaml-run \"1 + 2\")") +(epoch 411) +(eval "(ocaml-run \"10 - 3\")") +(epoch 412) +(eval "(ocaml-run \"4 * 5\")") +(epoch 413) +(eval "(ocaml-run \"20 / 4\")") +(epoch 414) +(eval "(ocaml-run \"10 mod 3\")") +(epoch 415) +(eval "(ocaml-run \"2 ** 10\")") +(epoch 416) +(eval "(ocaml-run \"(1 + 2) * 3\")") +(epoch 417) +(eval "(ocaml-run \"1 + 2 * 3\")") +(epoch 418) +(eval "(ocaml-run \"-5 + 10\")") + +;; Comparison & boolean +(epoch 420) +(eval "(ocaml-run \"1 < 2\")") +(epoch 421) +(eval "(ocaml-run \"3 > 2\")") +(epoch 422) +(eval "(ocaml-run \"2 = 2\")") +(epoch 423) +(eval "(ocaml-run \"1 <> 2\")") +(epoch 424) +(eval "(ocaml-run \"true && false\")") +(epoch 425) +(eval "(ocaml-run \"true || false\")") +(epoch 426) +(eval "(ocaml-run \"not false\")") + +;; String +(epoch 430) +(eval "(ocaml-run \"\\\"a\\\" ^ \\\"b\\\"\")") +(epoch 431) +(eval "(ocaml-run \"\\\"hello\\\" ^ \\\" \\\" ^ \\\"world\\\"\")") + +;; Conditional +(epoch 440) +(eval "(ocaml-run \"if true then 1 else 2\")") +(epoch 441) +(eval "(ocaml-run \"if 1 > 2 then 100 else 200\")") + +;; Let / lambda / app +(epoch 450) +(eval "(ocaml-run \"let x = 5 in x * 2\")") +(epoch 451) +(eval "(ocaml-run \"let f x = x + 1 in f 41\")") +(epoch 452) +(eval "(ocaml-run \"let f x y = x + y in f 3 4\")") +(epoch 453) +(eval "(ocaml-run \"(fun x -> x * x) 7\")") +(epoch 454) +(eval "(ocaml-run \"(fun x -> fun y -> x + y) 10 20\")") +(epoch 455) +(eval "(ocaml-run \"let f = fun x -> x + 1 in f 9\")") + +;; Closures capture +(epoch 460) +(eval "(ocaml-run \"let x = 10 in let f y = x + y in f 5\")") +(epoch 461) +(eval "(ocaml-run \"let make_adder n = fun x -> n + x in (make_adder 100) 1\")") + +;; Recursion +(epoch 470) +(eval "(ocaml-run \"let rec fact n = if n = 0 then 1 else n * fact (n - 1) in fact 5\")") +(epoch 471) +(eval "(ocaml-run \"let rec fib n = if n < 2 then n else fib (n - 1) + fib (n - 2) in fib 10\")") +(epoch 472) +(eval "(ocaml-run \"let rec sum n = if n = 0 then 0 else n + sum (n - 1) in sum 100\")") + +;; Sequence +(epoch 480) +(eval "(ocaml-run \"1; 2; 3\")") +(epoch 481) +(eval "(ocaml-run \"begin 10 end\")") + +;; Programs (top-level decls) +(epoch 490) +(eval "(ocaml-run-program \"let x = 1;; let y = 2;; x + y\")") +(epoch 491) +(eval "(ocaml-run-program \"let rec fact n = if n = 0 then 1 else n * fact (n - 1);; fact 6\")") +(epoch 492) +(eval "(ocaml-run-program \"let inc x = x + 1;; let double x = x * 2;; double (inc 4)\")") + +;; Pipe +(epoch 495) +(eval "(ocaml-run \"let f x = x * 2 in 5 |> f\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -510,6 +618,71 @@ check 327 "trailing ; OK" '("seq" ("int" 1) ("int" 2))' check 328 "begin a; end singleton seq" '("seq" ("var" "a"))' check 329 "match clause body absorbs ;" '("case" ("pwild") ("seq" ("var" "a") ("var" "b")))' +# ── Phase 2: evaluator ────────────────────────────────────────── +# atoms +check 400 "eval int" '42' +check 401 "eval float" '3.14' +check 402 "eval true" 'true' +check 403 "eval false" 'false' +check 404 "eval string" '"hi"' + +# arithmetic +check 410 "eval 1+2" '3' +check 411 "eval 10-3" '7' +check 412 "eval 4*5" '20' +check 413 "eval 20/4" '5' +check 414 "eval 10 mod 3" '1' +check 415 "eval 2 ** 10" '1024' +check 416 "eval (1+2)*3" '9' +check 417 "eval 1+2*3 prec" '7' +check 418 "eval -5+10" '5' + +# comparison & boolean +check 420 "eval 1<2" 'true' +check 421 "eval 3>2" 'true' +check 422 "eval 2=2" 'true' +check 423 "eval 1<>2" 'true' +check 424 "eval true && false" 'false' +check 425 "eval true || false" 'true' +check 426 "eval not false" 'true' + +# string +check 430 'eval "a" ^ "b"' '"ab"' +check 431 "eval string concat 3" '"hello world"' + +# conditional +check 440 "eval if true 1 else 2" '1' +check 441 "eval if 1>2 100 else 200" '200' + +# let / lambda / app +check 450 "eval let x=5 x*2" '10' +check 451 "eval let f x = x+1; f 41" '42' +check 452 "eval let f x y = x+y; f 3 4" '7' +check 453 "eval (fun x -> x*x) 7" '49' +check 454 "eval curried lambdas" '30' +check 455 "eval named lambda" '10' + +# closures +check 460 "eval closure capture" '15' +check 461 "eval make_adder" '101' + +# recursion +check 470 "eval fact 5" '120' +check 471 "eval fib 10" '55' +check 472 "eval sum 100" '5050' + +# sequence +check 480 "eval 1; 2; 3 → 3" '3' +check 481 "eval begin 10 end" '10' + +# programs +check 490 "run-prog x+y" '3' +check 491 "run-prog fact 6" '720' +check 492 "run-prog inc + double" '10' + +# pipe +check 495 "eval x |> f" '10' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 04c61365..01ab6dfd 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -144,13 +144,15 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 2 — Core evaluator (untyped) -- [ ] `ocaml-eval` entry: walks OCaml AST, produces SX values. -- [ ] `let`/`let rec`/`let ... in` (mutually recursive with `and`). -- [ ] Lambda + application (curried by default — auto-curry multi-param defs). -- [ ] `fun`/`function` (single-arg lambda with immediate match on arg). -- [ ] `if`/`then`/`else`, `begin`/`end`, sequence `;`. -- [ ] Arithmetic, comparison, boolean ops, string `^`, `mod`. -- [ ] Unit `()` value; `ignore`. +- [x] `ocaml-eval` entry: walks OCaml AST, produces SX values. +- [~] `let`/`let rec`/`let ... in` (single-binding done; mutually recursive + `and` deferred). +- [x] Lambda + application (curried by default — auto-curry multi-param defs). +- [ ] `fun`/`function` (single-arg lambda with immediate match on arg). _(`fun` + done; `function` blocked on parser support.)_ +- [x] `if`/`then`/`else`, `begin`/`end`, sequence `;`. +- [x] Arithmetic, comparison, boolean ops, string `^`, `mod`. +- [x] Unit `()` value; `ignore`. - [ ] References: `ref`, `!`, `:=`. - [ ] Mutable record fields. - [ ] `for i = lo to hi do ... done` loop; `while cond do ... done`. @@ -315,6 +317,16 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 2 — `lib/ocaml/eval.sx`: ocaml-eval + ocaml-run + + ocaml-run-program. Coverage: atoms, var lookup, :app (curried), + :op (arithmetic/comparison/boolean/^/mod/::/|>), :neg, :not, :if, + :seq, :tuple, :list, :fun (auto-curried host-SX closures), :let, + :let-rec (recursive knot via one-element-list mutable cell). Initial + env exposes `not`/`succ`/`pred`/`abs`/`max`/`min`/`fst`/`snd`/`ignore` + as host-SX functions. Tests: literals, arithmetic, comparison, boolean, + string concat, conditionals, lambda + closures + recursion (fact 5, + fib 10, sum 100), sequences, top-level program decls, |> pipe. 165/165 + passing (+42). - 2026-05-07 Phase 1 — sequence operator `;`. Lowest-precedence binary; `e1; e2; e3` → `(:seq e1 e2 e3)`. Two-phase grammar: `parse-expr-no-seq` is the prior expression entry point; new `parse-expr` wraps it with From 9b833a9442158f8664a0c81debc18073cce779f3 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:02:56 +0000 Subject: [PATCH 007/298] ocaml: phase 3 pattern matching + constructors (+18 tests, 183 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Constructor app: (:app (:con NAME) arg) -> (NAME …args). Tuple args flatten so Pair(a,b) -> ("Pair" a b), matching the parser's pattern flatten. Standalone (:con NAME) -> (NAME) nullary. Pattern matcher: :pwild, :pvar, :plit, :pcon (head + arity), :pcons (decompose), :plist (length match), :ptuple (after tuple tag). Match walks clauses until first success; runtime error on exhaustion. Recursive list functions (len, sum, fact) work end-to-end. --- lib/ocaml/eval.sx | 135 ++++++++++++++++++++++++++++++++++++++++++- lib/ocaml/test.sh | 84 +++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 25 ++++++-- 3 files changed, 237 insertions(+), 7 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 8fcb3d16..bebe0d26 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -62,6 +62,115 @@ (define ocaml-eval (fn (ast env) nil)) +;; Pattern matcher — returns the extended env on success, or :fail on +;; mismatch (using the keyword :fail so nil values don't ambiguate). +;; +;; Pattern shapes (from parser): +;; (:pwild) match anything, no binding +;; (:pvar NAME) match anything, bind NAME → val +;; (:plit LITAST) literal compare +;; (:pcon NAME PATS...) ctor: val must be (NAME ARGS...) and arity match +;; (:pcons HEAD TAIL) non-empty list: match head + tail +;; (:plist PATS...) list of exact length, item-wise match +;; (:ptuple PATS...) val must be ("tuple" ITEMS...) of same arity +(define ocaml-match-fail :fail) + +(define ocaml-eval-lit + (fn (lit-ast) + (let ((tag (nth lit-ast 0))) + (cond + ((= tag "int") (nth lit-ast 1)) + ((= tag "float") (nth lit-ast 1)) + ((= tag "string") (nth lit-ast 1)) + ((= tag "char") (nth lit-ast 1)) + ((= tag "bool") (nth lit-ast 1)) + ((= tag "unit") nil) + (else (error (str "ocaml-eval-lit: bad literal " tag))))))) + +(define ocaml-match-pat (fn (pat val env) ocaml-match-fail)) + +(define ocaml-match-list + (fn (pats vals env) + (cond + ((and (= (len pats) 0) (= (len vals) 0)) env) + ((or (= (len pats) 0) (= (len vals) 0)) ocaml-match-fail) + (else + (let ((env2 (ocaml-match-pat (first pats) (first vals) env))) + (cond + ((= env2 ocaml-match-fail) ocaml-match-fail) + (else (ocaml-match-list (rest pats) (rest vals) env2)))))))) + +(set! ocaml-match-pat + (fn (pat val env) + (let ((tag (nth pat 0))) + (cond + ((= tag "pwild") env) + ((= tag "pvar") + (ocaml-env-extend env (nth pat 1) val)) + ((= tag "plit") + (if (= (ocaml-eval-lit (nth pat 1)) val) env ocaml-match-fail)) + ((= tag "pcon") + ;; (:pcon NAME PATS...) — val must be (NAME VALS...) with same arity. + (let ((name (nth pat 1)) (arg-pats (rest (rest pat)))) + (cond + ((and (list? val) (not (empty? val)) (= (first val) name) + (= (len (rest val)) (len arg-pats))) + (ocaml-match-list arg-pats (rest val) env)) + (else ocaml-match-fail)))) + ((= tag "pcons") + ;; (:pcons HEAD TAIL) — val must be a non-empty list. + (cond + ((and (list? val) (not (empty? val)) + (not (and (not (empty? val)) (string? (first val))))) + ;; OCaml lists are SX lists (not tagged like ctors). Match + ;; head pattern against (first val), tail against (rest val). + (let ((env2 (ocaml-match-pat (nth pat 1) (first val) env))) + (cond + ((= env2 ocaml-match-fail) ocaml-match-fail) + (else (ocaml-match-pat (nth pat 2) (rest val) env2))))) + ;; Allow lists whose first element happens to be a string — + ;; ambiguous with ctors; treat them as plain lists. + ((and (list? val) (not (empty? val))) + (let ((env2 (ocaml-match-pat (nth pat 1) (first val) env))) + (cond + ((= env2 ocaml-match-fail) ocaml-match-fail) + (else (ocaml-match-pat (nth pat 2) (rest val) env2))))) + (else ocaml-match-fail))) + ((= tag "plist") + ;; (:plist PATS...) — val must be a list of exact length. + (let ((item-pats (rest pat))) + (cond + ((and (list? val) (= (len val) (len item-pats))) + (ocaml-match-list item-pats val env)) + (else ocaml-match-fail)))) + ((= tag "ptuple") + (let ((item-pats (rest pat))) + (cond + ((and (list? val) (not (empty? val)) + (= (first val) "tuple") + (= (len (rest val)) (len item-pats))) + (ocaml-match-list item-pats (rest val) env)) + (else ocaml-match-fail)))) + (else (error (str "ocaml-match-pat: unknown pattern tag " tag))))))) + +(define ocaml-match-eval + (fn (scrut-ast clauses env) + (let ((val (ocaml-eval scrut-ast env))) + (begin + (define try-clauses + (fn (cs) + (cond + ((empty? cs) + (error (str "ocaml-eval: match failure on " val))) + (else + (let ((clause (first cs))) + (let ((pat (nth clause 1)) (body (nth clause 2))) + (let ((env2 (ocaml-match-pat pat val env))) + (cond + ((= env2 ocaml-match-fail) (try-clauses (rest cs))) + (else (ocaml-eval body env2)))))))))) + (try-clauses clauses))))) + ;; Auto-curry: (:fun ("x" "y" "z") body) → (fn (x) (fn (y) (fn (z) body))). ;; A zero-param lambda evaluates the body immediately on first call — ;; OCaml does not have nullary functions; `()`-taking functions still @@ -153,10 +262,30 @@ (map (fn (e) (ocaml-eval e env)) (rest ast))) ((= tag "fun") (ocaml-make-curried (nth ast 1) (nth ast 2) env)) + ((= tag "con") + ;; Standalone ctor — produces a nullary tagged value. + (list (nth ast 1))) ((= tag "app") - (let ((fn-val (ocaml-eval (nth ast 1) env)) - (arg-val (ocaml-eval (nth ast 2) env))) - (fn-val arg-val))) + (let ((fn-ast (nth ast 1))) + (cond + ;; Constructor application: build a tagged value, flattening + ;; a tuple arg into multiple ctor args (so `Pair (a, b)` + ;; becomes ("Pair" va vb) — matches the parser's pattern + ;; flattening). + ((= (ocaml-tag-of fn-ast) "con") + (let ((name (nth fn-ast 1)) + (arg-val (ocaml-eval (nth ast 2) env))) + (cond + ((and (list? arg-val) (not (empty? arg-val)) + (= (first arg-val) "tuple")) + (cons name (rest arg-val))) + (else (list name arg-val))))) + (else + (let ((fn-val (ocaml-eval fn-ast env)) + (arg-val (ocaml-eval (nth ast 2) env))) + (fn-val arg-val)))))) + ((= tag "match") + (ocaml-match-eval (nth ast 1) (nth ast 2) env)) ((= tag "let") (let ((name (nth ast 1)) (params (nth ast 2)) (rhs (nth ast 3)) (body (nth ast 4))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 83dc82a6..be643746 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -430,6 +430,57 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 495) (eval "(ocaml-run \"let f x = x * 2 in 5 |> f\")") +;; ── Phase 3: ADTs + match (eval) ─────────────────────────────── +;; Constructors +(epoch 500) +(eval "(ocaml-run \"None\")") +(epoch 501) +(eval "(ocaml-run \"Some 42\")") +(epoch 502) +(eval "(ocaml-run \"Some (1, 2)\")") + +;; Match — option +(epoch 510) +(eval "(ocaml-run \"match Some 5 with | None -> 0 | Some y -> y\")") +(epoch 511) +(eval "(ocaml-run \"match None with | None -> 0 | Some y -> y\")") + +;; Match — literals +(epoch 520) +(eval "(ocaml-run \"match 3 with | 1 -> 100 | 2 -> 200 | _ -> 999\")") +(epoch 521) +(eval "(ocaml-run \"match true with | true -> 1 | false -> 0\")") +(epoch 522) +(eval "(ocaml-run \"match \\\"hi\\\" with | \\\"hi\\\" -> 1 | _ -> 0\")") + +;; Match — tuples +(epoch 530) +(eval "(ocaml-run \"match (1, 2) with | (a, b) -> a + b\")") +(epoch 531) +(eval "(ocaml-run \"match (1, 2, 3) with | (a, b, c) -> a * b * c\")") + +;; Match — list cons / nil +(epoch 540) +(eval "(ocaml-run \"match [1; 2; 3] with | [] -> 0 | h :: _ -> h\")") +(epoch 541) +(eval "(ocaml-run \"match [] with | [] -> 0 | h :: _ -> h\")") +(epoch 542) +(eval "(ocaml-run \"match [1; 2; 3] with | [a; b; c] -> a + b + c | _ -> 0\")") +(epoch 543) +(eval "(ocaml-run \"let rec len lst = match lst with | [] -> 0 | _ :: t -> 1 + len t in len [1; 2; 3; 4; 5]\")") +(epoch 544) +(eval "(ocaml-run \"let rec sum lst = match lst with | [] -> 0 | h :: t -> h + sum t in sum [1; 2; 3; 4; 5]\")") + +;; Match — wildcard + var +(epoch 550) +(eval "(ocaml-run \"match 99 with | _ -> 1\")") +(epoch 551) +(eval "(ocaml-run \"match 99 with | x -> x + 1\")") + +;; Constructors with tuple args +(epoch 560) +(eval "(ocaml-run \"match Pair (1, 2) with | Pair (a, b) -> a * b\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -683,6 +734,39 @@ check 492 "run-prog inc + double" '10' # pipe check 495 "eval x |> f" '10' +# ── Phase 3: ADTs + match (eval) ──────────────────────────────── +# constructors +check 500 "eval None" '("None")' +check 501 "eval Some 42" '("Some" 42)' +check 502 "eval Pair tuple-arg" '("Some" 1 2)' + +# option match +check 510 "match Some 5 -> 5" '5' +check 511 "match None -> 0" '0' + +# literal match +check 520 "match 3 -> _ -> 999" '999' +check 521 "match bool true" '1' +check 522 "match string lit" '1' + +# tuple match +check 530 "match (1,2)" '3' +check 531 "match (1,2,3)" '6' + +# list match +check 540 "match list cons head" '1' +check 541 "match empty list" '0' +check 542 "match list literal pat" '6' +check 543 "match recursive len" '5' +check 544 "match recursive sum" '15' + +# wildcard + var +check 550 "match _ -> 1" '1' +check 551 "match x -> x+1" '100' + +# ctor with tuple arg +check 560 "Pair(a,b) → a*b" '2' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 01ab6dfd..e8ce1135 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -162,10 +162,13 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 3 — ADTs + pattern matching - [ ] `type` declarations: `type t = A | B of t1 * t2 | C of { x: int }`. -- [ ] Constructors as tagged lists: `A` → `(:A)`, `B(1, "x")` → `(:B 1 "x")`. -- [ ] `match`/`with`: constructor, literal, variable, wildcard, tuple, list cons/nil, - `as` binding, or-patterns, nested patterns, `when` guard. -- [ ] Exhaustiveness: runtime error on incomplete match (no compile-time check yet). + _(Parser + evaluator currently inferred-arity at runtime; type decls + pending.)_ +- [x] Constructors as tagged lists: `A` → `("A")`, `B(1, "x")` → `("B" 1 "x")`. +- [~] `match`/`with`: constructor, literal, variable, wildcard, tuple, list + cons/nil, nested patterns. _(Pending: `as` binding, or-patterns, + `when` guard.)_ +- [x] Exhaustiveness: runtime error on incomplete match (no compile-time check yet). - [ ] Built-in types: `option` (`None`/`Some`), `result` (`Ok`/`Error`), `list` (nil/cons), `bool`, `unit`, `exn`. - [ ] `exception` declarations; built-in: `Not_found`, `Invalid_argument`, @@ -317,6 +320,20 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 3 — pattern matching evaluator + constructors (+18 + tests, 183 total). Constructor application: `(:app (:con NAME) arg)` + builds a tagged list `(NAME …args)` with tuple args flattened (so + `Pair (a, b)` → `("Pair" a b)` matches the parser's pattern flatten). + Standalone ctor `(:con NAME)` → `(NAME)` (nullary). Pattern matcher: + :pwild / :pvar / :plit (unboxed compare) / :pcon (head + arity match) / + :pcons (cons-decompose) / :plist (length+items) / :ptuple (after `tuple` + tag). Match drives clauses until first success; runtime error on + exhaustion. Tested with option match, literal match, tuple match, + recursive list functions (`len`, `sum`), nested ctor (`Pair(a,b)`). + Note: arity flattening happens for any tuple-arg ctor — without ADT + declarations there's no way to distinguish `Some (1,2)` (single tuple + payload) from `Pair (1,2)` (two-arg ctor). All-flatten convention is + consistent across parser + evaluator. - 2026-05-08 Phase 2 — `lib/ocaml/eval.sx`: ocaml-eval + ocaml-run + ocaml-run-program. Coverage: atoms, var lookup, :app (curried), :op (arithmetic/comparison/boolean/^/mod/::/|>), :neg, :not, :if, From a11f3c33b6831c4c91f189962b66cc81564abbfa Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:07:26 +0000 Subject: [PATCH 008/298] ocaml: phase 2 references ref/!/:= (+6 tests, 189 total) ref is a builtin boxing its arg in a one-element list. Prefix ! parses to (:deref ...) and reads via (nth cell 0). := joins the binop precedence table at level 1 right-assoc and mutates via set-nth!. Closures share the underlying cell. --- lib/ocaml/eval.sx | 24 +++++++++++++++++++----- lib/ocaml/parser.sx | 3 +++ lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 8 +++++++- 4 files changed, 51 insertions(+), 6 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index bebe0d26..25d0226c 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -38,7 +38,10 @@ (list "min" (fn (a) (fn (b) (if (< a b) a b)))) (list "fst" (fn (p) (nth p 1))) (list "snd" (fn (p) (nth p 2))) - (list "ignore" (fn (x) nil))))) + (list "ignore" (fn (x) nil)) + ;; References. A ref cell is a one-element list; ! reads it and + ;; := mutates it via set-nth!. + (list "ref" (fn (x) (list x)))))) (define ocaml-env-lookup (fn (env name) @@ -235,11 +238,22 @@ (else (error (str "ocaml-eval: unbound variable " name)))))) ((= tag "neg") (- 0 (ocaml-eval (nth ast 1) env))) ((= tag "not") (not (ocaml-eval (nth ast 1) env))) + ((= tag "deref") + (let ((cell (ocaml-eval (nth ast 1) env))) + (nth cell 0))) ((= tag "op") - (ocaml-eval-op - (nth ast 1) - (ocaml-eval (nth ast 2) env) - (ocaml-eval (nth ast 3) env))) + (let ((op (nth ast 1))) + (cond + ;; := mutates the lhs cell — short-circuit before generic + ;; eval-op so we still evaluate lhs (to obtain the cell). + ((= op ":=") + (let ((cell (ocaml-eval (nth ast 2) env)) + (new-val (ocaml-eval (nth ast 3) env))) + (begin (set-nth! cell 0 new-val) nil))) + (else + (ocaml-eval-op op + (ocaml-eval (nth ast 2) env) + (ocaml-eval (nth ast 3) env)))))) ((= tag "if") (if (ocaml-eval (nth ast 1) env) (ocaml-eval (nth ast 2) env) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 6c48ee9b..07df0622 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -48,6 +48,7 @@ (define ocaml-op-table (list + (list ":=" 1 :right) (list "||" 2 :right) (list "or" 2 :right) (list "&&" 3 :right) @@ -401,6 +402,8 @@ (cond ((at-op? "-") (begin (advance-tok!) (list :neg (parse-prefix)))) + ((at-op? "!") + (begin (advance-tok!) (list :deref (parse-prefix)))) ((at-kw? "not") (begin (advance-tok!) (list :not (parse-prefix)))) (else (parse-app))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index be643746..d06fda43 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -481,6 +481,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 560) (eval "(ocaml-run \"match Pair (1, 2) with | Pair (a, b) -> a * b\")") +;; ── References (ref / ! / :=) ────────────────────────────────── +(epoch 600) +(eval "(ocaml-run \"let r = ref 5 in !r\")") +(epoch 601) +(eval "(ocaml-run \"let r = ref 5 in r := 10; !r\")") +(epoch 602) +(eval "(ocaml-run \"let r = ref 0 in r := !r + 1; r := !r + 1; !r\")") +(epoch 603) +(eval "(ocaml-run \"let r = ref 100 in let f x = r := !r + x in f 5; f 10; !r\")") +(epoch 604) +(eval "(ocaml-run \"let r = ref \\\"a\\\" in r := \\\"b\\\"; !r\")") +(epoch 605) +(eval "(ocaml-run \"let count = ref 0 in let rec loop n = if n = 0 then !count else (count := !count + n; loop (n - 1)) in loop 5\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -767,6 +781,14 @@ check 551 "match x -> x+1" '100' # ctor with tuple arg check 560 "Pair(a,b) → a*b" '2' +# ── References ────────────────────────────────────────────────── +check 600 "deref new ref" '5' +check 601 ":= then deref" '10' +check 602 "increment cell twice" '2' +check 603 "ref captured by closure" '115' +check 604 "ref of string" '"b"' +check 605 "ref + recursion" '15' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e8ce1135..77c018ec 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -153,7 +153,7 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] `if`/`then`/`else`, `begin`/`end`, sequence `;`. - [x] Arithmetic, comparison, boolean ops, string `^`, `mod`. - [x] Unit `()` value; `ignore`. -- [ ] References: `ref`, `!`, `:=`. +- [x] References: `ref`, `!`, `:=`. - [ ] Mutable record fields. - [ ] `for i = lo to hi do ... done` loop; `while cond do ... done`. - [ ] `try`/`with` — maps to SX `guard`; `raise` via perform. @@ -320,6 +320,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 2 — references (`ref`/`!`/`:=`). `ref` is a builtin + that boxes its argument in a one-element list (the mutable cell); + prefix `!` parses to `(:deref EXPR)` and reads `(nth cell 0)`; `:=` + joins the precedence table at the lowest binop level (right-assoc) and + short-circuits in eval to mutate via `set-nth!`. Closures capture refs + by sharing the underlying list. 189/189 (+6). - 2026-05-08 Phase 3 — pattern matching evaluator + constructors (+18 tests, 183 total). Constructor application: `(:app (:con NAME) arg)` builds a tagged list `(NAME …args)` with tuple args flattened (so From 9b8b0b43257642892e25fec85161f002e2153c65 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:11:13 +0000 Subject: [PATCH 009/298] ocaml: phase 2 for/while loops (+5 tests, 194 total) Parser: for i = lo to|downto hi do body done, while cond do body done. AST: (:for NAME LO HI :ascend|:descend BODY) and (:while COND BODY). Eval re-binds the loop var per iteration; both forms evaluate to unit. --- lib/ocaml/eval.sx | 45 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/parser.sx | 29 ++++++++++++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 6 +++++- 4 files changed, 98 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 25d0226c..e6176172 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -300,6 +300,51 @@ (fn-val arg-val)))))) ((= tag "match") (ocaml-match-eval (nth ast 1) (nth ast 2) env)) + ((= tag "for") + ;; (:for NAME LO HI DIR BODY) — DIR is "ascend" or "descend". + (let ((name (nth ast 1)) + (lo (ocaml-eval (nth ast 2) env)) + (hi (ocaml-eval (nth ast 3) env)) + (dir (nth ast 4)) + (body (nth ast 5))) + (begin + (cond + ((= dir "ascend") + (let ((i lo)) + (begin + (define loop + (fn () + (when (<= i hi) + (begin + (ocaml-eval body + (ocaml-env-extend env name i)) + (set! i (+ i 1)) + (loop))))) + (loop)))) + ((= dir "descend") + (let ((i lo)) + (begin + (define loop + (fn () + (when (>= i hi) + (begin + (ocaml-eval body + (ocaml-env-extend env name i)) + (set! i (- i 1)) + (loop))))) + (loop))))) + nil))) + ((= tag "while") + (let ((cond-ast (nth ast 1)) (body (nth ast 2))) + (begin + (define loop + (fn () + (when (ocaml-eval cond-ast env) + (begin + (ocaml-eval body env) + (loop))))) + (loop) + nil))) ((= tag "let") (let ((name (nth ast 1)) (params (nth ast 2)) (rhs (nth ast 3)) (body (nth ast 4))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 07df0622..3e548b5c 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -573,6 +573,33 @@ (begin (advance-tok!) (one) (loop))))) (loop) (cons :match (cons scrut (list cases))))))))) + (define parse-for + (fn () + (let ((name (ocaml-tok-value (consume! "ident" nil)))) + (begin + (consume! "op" "=") + (let ((lo (parse-expr-no-seq))) + (let ((dir + (cond + ((at-kw? "to") (begin (advance-tok!) :ascend)) + ((at-kw? "downto") (begin (advance-tok!) :descend)) + (else (error "ocaml-parse: expected to/downto in for"))))) + (let ((hi (parse-expr-no-seq))) + (begin + (consume! "keyword" "do") + (let ((body (parse-expr))) + (begin + (consume! "keyword" "done") + (list :for name lo hi dir body))))))))))) + (define parse-while + (fn () + (let ((cond-expr (parse-expr-no-seq))) + (begin + (consume! "keyword" "do") + (let ((body (parse-expr))) + (begin + (consume! "keyword" "done") + (list :while cond-expr body))))))) (set! parse-expr-no-seq (fn @@ -582,6 +609,8 @@ ((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "if") (begin (advance-tok!) (parse-if))) ((at-kw? "match") (begin (advance-tok!) (parse-match))) + ((at-kw? "for") (begin (advance-tok!) (parse-for))) + ((at-kw? "while") (begin (advance-tok!) (parse-while))) (else (parse-tuple))))) (set! parse-expr diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index d06fda43..abd32a5d 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -495,6 +495,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 605) (eval "(ocaml-run \"let count = ref 0 in let rec loop n = if n = 0 then !count else (count := !count + n; loop (n - 1)) in loop 5\")") +;; ── for / while loops ────────────────────────────────────────── +(epoch 620) +(eval "(ocaml-run \"let s = ref 0 in for i = 1 to 5 do s := !s + i done; !s\")") +(epoch 621) +(eval "(ocaml-run \"let s = ref 0 in for i = 5 downto 1 do s := !s + i done; !s\")") +(epoch 622) +(eval "(ocaml-run \"let i = ref 0 in let s = ref 0 in while !i < 5 do i := !i + 1; s := !s + !i done; !s\")") +(epoch 623) +(eval "(ocaml-run \"let s = ref 0 in for i = 1 to 100 do s := !s + i done; !s\")") +(epoch 624) +(eval "(ocaml-run \"let p = ref 1 in for i = 1 to 5 do p := !p * i done; !p\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -789,6 +801,13 @@ check 603 "ref captured by closure" '115' check 604 "ref of string" '"b"' check 605 "ref + recursion" '15' +# ── for / while ───────────────────────────────────────────────── +check 620 "for 1..5 sum" '15' +check 621 "for 5 downto 1 sum" '15' +check 622 "while loop" '15' +check 623 "for 1..100 sum" '5050' +check 624 "for 1..5 product = 120" '120' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 77c018ec..4c0975e2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -155,7 +155,8 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] Unit `()` value; `ignore`. - [x] References: `ref`, `!`, `:=`. - [ ] Mutable record fields. -- [ ] `for i = lo to hi do ... done` loop; `while cond do ... done`. +- [x] `for i = lo to hi do ... done` loop; `while cond do ... done` (incl. + `downto` direction). - [ ] `try`/`with` — maps to SX `guard`; `raise` via perform. - [ ] Tests in `lib/ocaml/tests/eval.sx` — 50+ tests, pure + imperative. @@ -320,6 +321,9 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 2 — `for`/`while` loops. `(:for NAME LO HI DIR BODY)` + with `:ascend`/`:descend` direction (`to`/`downto`); `(:while COND BODY)`. + Both eval to unit and re-bind the loop var per iteration. 194/194 (+5). - 2026-05-08 Phase 2 — references (`ref`/`!`/`:=`). `ref` is a builtin that boxes its argument in a one-element list (the mutable cell); prefix `!` parses to `(:deref EXPR)` and reads `(nth cell 0)`; `:=` From 937342bbf08cf9903bc3e28aeb5e651aaf683b48 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:15:38 +0000 Subject: [PATCH 010/298] ocaml: phase 2 function | pat -> body (+4 tests, 198 total) Sugar for fun + match. AST (:function CLAUSES) -> unary closure that runs ocaml-match-clauses on its arg. let rec recognises :function as a recursive rhs and ties the knot via cell, so let rec map f = function | [] -> [] | h::t -> f h :: map f t works. ocaml-match-eval refactored to share clause-walk with function. --- lib/ocaml/eval.sx | 119 ++++++++++++++++++++++++------------------- lib/ocaml/parser.sx | 23 +++++++++ lib/ocaml/test.sh | 16 ++++++ plans/ocaml-on-sx.md | 10 +++- 4 files changed, 114 insertions(+), 54 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index e6176172..f7dd1f8a 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -156,23 +156,26 @@ (else ocaml-match-fail)))) (else (error (str "ocaml-match-pat: unknown pattern tag " tag))))))) +(define ocaml-match-clauses + (fn (val clauses env) + (begin + (define try-clauses + (fn (cs) + (cond + ((empty? cs) + (error (str "ocaml-eval: match failure on " val))) + (else + (let ((clause (first cs))) + (let ((pat (nth clause 1)) (body (nth clause 2))) + (let ((env2 (ocaml-match-pat pat val env))) + (cond + ((= env2 ocaml-match-fail) (try-clauses (rest cs))) + (else (ocaml-eval body env2)))))))))) + (try-clauses clauses)))) + (define ocaml-match-eval (fn (scrut-ast clauses env) - (let ((val (ocaml-eval scrut-ast env))) - (begin - (define try-clauses - (fn (cs) - (cond - ((empty? cs) - (error (str "ocaml-eval: match failure on " val))) - (else - (let ((clause (first cs))) - (let ((pat (nth clause 1)) (body (nth clause 2))) - (let ((env2 (ocaml-match-pat pat val env))) - (cond - ((= env2 ocaml-match-fail) (try-clauses (rest cs))) - (else (ocaml-eval body env2)))))))))) - (try-clauses clauses))))) + (ocaml-match-clauses (ocaml-eval scrut-ast env) clauses env))) ;; Auto-curry: (:fun ("x" "y" "z") body) → (fn (x) (fn (y) (fn (z) body))). ;; A zero-param lambda evaluates the body immediately on first call — @@ -300,6 +303,11 @@ (fn-val arg-val)))))) ((= tag "match") (ocaml-match-eval (nth ast 1) (nth ast 2) env)) + ((= tag "function") + ;; `function | pat -> body | …` — produces a unary closure that + ;; matches its argument against the clauses. + (let ((clauses (nth ast 1)) (captured env)) + (fn (arg) (ocaml-match-clauses arg clauses captured)))) ((= tag "for") ;; (:for NAME LO HI DIR BODY) — DIR is "ascend" or "descend". (let ((name (nth ast 1)) @@ -354,30 +362,30 @@ (ocaml-make-curried params rhs env)))) (ocaml-eval body (ocaml-env-extend env name rhs-val))))) ((= tag "let-rec") - ;; For function bindings: tie the knot via a mutable cell. The - ;; placeholder closure that's bound first dereferences the cell - ;; on each call, so the function can call itself once the cell - ;; is set to the real closure. + ;; Tie the knot via a mutable cell when rhs is function-typed. + ;; The placeholder closure dereferences the cell on each call. (let ((name (nth ast 1)) (params (nth ast 2)) (rhs (nth ast 3)) (body (nth ast 4))) - (cond - ((= (len params) 0) - ;; Non-functional let-rec — OCaml only allows this when the - ;; rhs is "syntactically a function or constructor". For the - ;; common case of a value, evaluate non-recursively. - (let ((rhs-val (ocaml-eval rhs env))) - (ocaml-eval body (ocaml-env-extend env name rhs-val)))) - (else - ;; Use a one-element list as a mutable cell to tie the - ;; recursive knot. The placeholder closure dereferences - ;; the cell on each call. - (let ((cell (list nil))) - (let ((env2 (ocaml-env-extend env name - (fn (arg) ((nth cell 0) arg))))) - (let ((rhs-val (ocaml-make-curried params rhs env2))) - (begin - (set-nth! cell 0 rhs-val) - (ocaml-eval body env2))))))))) + (let ((rhs-fn? + (or (> (len params) 0) + (= (ocaml-tag-of rhs) "fun") + (= (ocaml-tag-of rhs) "function")))) + (cond + (rhs-fn? + (let ((cell (list nil))) + (let ((env2 (ocaml-env-extend env name + (fn (arg) ((nth cell 0) arg))))) + (let ((rhs-val + (if (= (len params) 0) + (ocaml-eval rhs env2) + (ocaml-make-curried params rhs env2)))) + (begin + (set-nth! cell 0 rhs-val) + (ocaml-eval body env2)))))) + (else + (let ((rhs-val (ocaml-eval rhs env))) + (ocaml-eval body + (ocaml-env-extend env name rhs-val)))))))) (else (error (str "ocaml-eval: unknown AST tag " tag))))))) @@ -406,21 +414,28 @@ (set! last v))))) ((= tag "def-rec") (let ((name (nth decl 1)) (params (nth decl 2)) (rhs (nth decl 3))) - (cond - ((= (len params) 0) - (let ((v (ocaml-eval rhs env))) - (begin - (set! env (ocaml-env-extend env name v)) - (set! last v)))) - (else - (let ((cell (list nil))) - (let ((env2 (ocaml-env-extend env name - (fn (arg) ((nth cell 0) arg))))) - (let ((v (ocaml-make-curried params rhs env2))) - (begin - (set-nth! cell 0 v) - (set! env env2) - (set! last v))))))))) + (let ((rhs-fn? + (or (> (len params) 0) + (= (ocaml-tag-of rhs) "fun") + (= (ocaml-tag-of rhs) "function")))) + (cond + (rhs-fn? + (let ((cell (list nil))) + (let ((env2 (ocaml-env-extend env name + (fn (arg) ((nth cell 0) arg))))) + (let ((v + (if (= (len params) 0) + (ocaml-eval rhs env2) + (ocaml-make-curried params rhs env2)))) + (begin + (set-nth! cell 0 v) + (set! env env2) + (set! last v)))))) + (else + (let ((v (ocaml-eval rhs env))) + (begin + (set! env (ocaml-env-extend env name v)) + (set! last v)))))))) ((= tag "expr") (set! last (ocaml-eval (nth decl 1) env))) (else (error (str "ocaml-run-program: bad decl " tag))))))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 3e548b5c..8e93415a 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -573,6 +573,28 @@ (begin (advance-tok!) (one) (loop))))) (loop) (cons :match (cons scrut (list cases))))))))) + (define parse-function + (fn () + ;; `function | pat -> body | …` ≡ fun x -> match x with | pat -> body + (let () + (begin + (when (at-op? "|") (advance-tok!)) + (let ((cases (list))) + (begin + (define one + (fn () + (let ((p (parse-pattern))) + (begin + (consume! "op" "->") + (let ((body (parse-expr))) + (append! cases (list :case p body))))))) + (one) + (define loop + (fn () + (when (at-op? "|") + (begin (advance-tok!) (one) (loop))))) + (loop) + (list :function cases))))))) (define parse-for (fn () (let ((name (ocaml-tok-value (consume! "ident" nil)))) @@ -609,6 +631,7 @@ ((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "if") (begin (advance-tok!) (parse-if))) ((at-kw? "match") (begin (advance-tok!) (parse-match))) + ((at-kw? "function") (begin (advance-tok!) (parse-function))) ((at-kw? "for") (begin (advance-tok!) (parse-for))) ((at-kw? "while") (begin (advance-tok!) (parse-while))) (else (parse-tuple))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index abd32a5d..ac93a4a7 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -507,6 +507,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 624) (eval "(ocaml-run \"let p = ref 1 in for i = 1 to 5 do p := !p * i done; !p\")") +;; ── function (sugar for fun + match) ─────────────────────────── +(epoch 640) +(eval "(ocaml-run \"(function | None -> 0 | Some x -> x) (Some 7)\")") +(epoch 641) +(eval "(ocaml-run \"let f = function | None -> 0 | Some x -> x in f None\")") +(epoch 642) +(eval "(ocaml-run \"let rec len = function | [] -> 0 | _ :: t -> 1 + len t in len [1; 2; 3]\")") +(epoch 643) +(eval "(ocaml-run-program \"let rec map f = function | [] -> [] | h :: t -> f h :: map f t;; map (fun x -> x * x) [1; 2; 3; 4]\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -808,6 +818,12 @@ check 622 "while loop" '15' check 623 "for 1..100 sum" '5050' check 624 "for 1..5 product = 120" '120' +# ── function ──────────────────────────────────────────────────── +check 640 "function None|Some Some 7" '7' +check 641 "function None=0" '0' +check 642 "rec function len" '3' +check 643 "rec function map x*x" '(1 4 9 16)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 4c0975e2..6cfb681c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -148,8 +148,7 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] `let`/`let rec`/`let ... in` (single-binding done; mutually recursive `and` deferred). - [x] Lambda + application (curried by default — auto-curry multi-param defs). -- [ ] `fun`/`function` (single-arg lambda with immediate match on arg). _(`fun` - done; `function` blocked on parser support.)_ +- [x] `fun`/`function` (single-arg lambda with immediate match on arg). - [x] `if`/`then`/`else`, `begin`/`end`, sequence `;`. - [x] Arithmetic, comparison, boolean ops, string `^`, `mod`. - [x] Unit `()` value; `ignore`. @@ -321,6 +320,13 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 2 — `function | pat -> body | …` parser + eval. + Sugar for `fun x -> match x with | …`. AST: `(:function CLAUSES)` + evaluated to a unary closure that runs `ocaml-match-clauses` on the + argument. `let rec` knot also triggers when rhs is `:function`, so + `let rec map f = function | [] -> [] | h::t -> f h :: map f t` works. + ocaml-match-eval refactored to share `ocaml-match-clauses` with the + function form. 198/198 (+4). - 2026-05-08 Phase 2 — `for`/`while` loops. `(:for NAME LO HI DIR BODY)` with `:ascend`/`:descend` direction (`to`/`downto`); `(:while COND BODY)`. Both eval to unit and re-bind the loop var per iteration. 194/194 (+5). From 6a1f63f0d103d3c29a27b0c42456d21a933aa2e5 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:20:11 +0000 Subject: [PATCH 011/298] ocaml: phase 2 try/with + raise (+6 tests, 204 total) Parser: try EXPR with | pat -> handler | ... -> (:try EXPR CLAUSES). Eval delegates to SX guard with else matching the raised value against clause patterns; re-raises on no-match. raise/failwith/invalid_arg shipped as builtins. failwith "msg" raises ("Failure" msg) so | Failure msg -> ... patterns match. --- lib/ocaml/eval.sx | 30 +++++++++++++++++++++++++++++- lib/ocaml/parser.sx | 23 +++++++++++++++++++++++ lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++++- 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index f7dd1f8a..d28e950b 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -41,7 +41,13 @@ (list "ignore" (fn (x) nil)) ;; References. A ref cell is a one-element list; ! reads it and ;; := mutates it via set-nth!. - (list "ref" (fn (x) (list x)))))) + (list "ref" (fn (x) (list x))) + ;; Exceptions: `raise e` invokes the host-SX raise; values are + ;; tagged like other ctors so `try ... with | Exn x -> handler` + ;; can pattern-match them. + (list "raise" (fn (e) (raise e))) + (list "failwith" (fn (msg) (raise (list "Failure" msg)))) + (list "invalid_arg" (fn (msg) (raise (list "Invalid_argument" msg))))))) (define ocaml-env-lookup (fn (env name) @@ -342,6 +348,28 @@ (loop))))) (loop))))) nil))) + ((= tag "try") + ;; (:try EXPR CLAUSES) — evaluate EXPR; if it raises, match the + ;; raised value against CLAUSES. Re-raise on no-match. + (let ((expr (nth ast 1)) (clauses (nth ast 2)) (env-cap env)) + (guard (e + (else + (begin + (define try-clauses + (fn (cs) + (cond + ((empty? cs) (raise e)) + (else + (let ((clause (first cs))) + (let ((pat (nth clause 1)) + (body (nth clause 2))) + (let ((env2 (ocaml-match-pat pat e env-cap))) + (cond + ((= env2 ocaml-match-fail) + (try-clauses (rest cs))) + (else (ocaml-eval body env2)))))))))) + (try-clauses clauses)))) + (ocaml-eval expr env-cap)))) ((= tag "while") (let ((cond-ast (nth ast 1)) (body (nth ast 2))) (begin diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 8e93415a..d55d101e 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -573,6 +573,28 @@ (begin (advance-tok!) (one) (loop))))) (loop) (cons :match (cons scrut (list cases))))))))) + (define parse-try + (fn () + (let ((expr (parse-expr-no-seq))) + (begin + (consume! "keyword" "with") + (when (at-op? "|") (advance-tok!)) + (let ((cases (list))) + (begin + (define one + (fn () + (let ((p (parse-pattern))) + (begin + (consume! "op" "->") + (let ((body (parse-expr))) + (append! cases (list :case p body))))))) + (one) + (define loop + (fn () + (when (at-op? "|") + (begin (advance-tok!) (one) (loop))))) + (loop) + (list :try expr cases))))))) (define parse-function (fn () ;; `function | pat -> body | …` ≡ fun x -> match x with | pat -> body @@ -634,6 +656,7 @@ ((at-kw? "function") (begin (advance-tok!) (parse-function))) ((at-kw? "for") (begin (advance-tok!) (parse-for))) ((at-kw? "while") (begin (advance-tok!) (parse-while))) + ((at-kw? "try") (begin (advance-tok!) (parse-try))) (else (parse-tuple))))) (set! parse-expr diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index ac93a4a7..df1039aa 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -517,6 +517,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 643) (eval "(ocaml-run-program \"let rec map f = function | [] -> [] | h :: t -> f h :: map f t;; map (fun x -> x * x) [1; 2; 3; 4]\")") +;; ── try / with / raise ───────────────────────────────────────── +(epoch 660) +(eval "(ocaml-run \"try 1 + 2 with | _ -> 0\")") +(epoch 661) +(eval "(ocaml-run \"try raise (Foo 5) with | Foo x -> x | Bar -> 99\")") +(epoch 662) +(eval "(ocaml-run \"try raise Bar with | Foo x -> x | Bar -> 99\")") +(epoch 663) +(eval "(ocaml-run \"try failwith \\\"oops\\\" with | Failure msg -> msg\")") +(epoch 664) +(eval "(ocaml-run \"try (raise (Foo 1); 999) with | Foo x -> x + 100\")") +(epoch 665) +(eval "(ocaml-run \"let f x = if x < 0 then raise (NegArg x) else x * 2 in try f (-5) with | NegArg n -> n\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -824,6 +838,14 @@ check 641 "function None=0" '0' check 642 "rec function len" '3' check 643 "rec function map x*x" '(1 4 9 16)' +# ── try / with / raise ────────────────────────────────────────── +check 660 "try success" '3' +check 661 "try Foo caught" '5' +check 662 "try Bar caught" '99' +check 663 "try failwith" '"oops"' +check 664 "try sequence raises" '101' +check 665 "raise from function" '-5' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6cfb681c..e9609dd7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -156,7 +156,8 @@ SX CEK evaluator (both JS and OCaml hosts) - [ ] Mutable record fields. - [x] `for i = lo to hi do ... done` loop; `while cond do ... done` (incl. `downto` direction). -- [ ] `try`/`with` — maps to SX `guard`; `raise` via perform. +- [x] `try`/`with` — maps to SX `guard`; `raise` is a builtin that calls + host SX `raise`. `failwith` and `invalid_arg` ship as builtins. - [ ] Tests in `lib/ocaml/tests/eval.sx` — 50+ tests, pure + imperative. ### Phase 3 — ADTs + pattern matching @@ -320,6 +321,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 2 — `try`/`with` + `raise` builtin. Parser produces + `(:try EXPR CLAUSES)`; eval delegates to SX `guard` with `else` + matching the raised value against clause patterns and re-raising on + no-match. `raise`/`failwith`/`invalid_arg` exposed as builtins; + failwith builds `("Failure" msg)` so `Failure msg -> ...` patterns + match. 204/204 (+6). - 2026-05-08 Phase 2 — `function | pat -> body | …` parser + eval. Sugar for `fun x -> match x with | …`. AST: `(:function CLAUSES)` evaluated to a unary closure that runs `ocaml-match-clauses` on the From 317f93b2afa5206f158e2a5a74e7f969c53eb0ed Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:33:34 +0000 Subject: [PATCH 012/298] ocaml: phase 4 modules + field access (+11 tests, 215 total) module M = struct DECLS end parsed by sub-tokenising the body source between struct and the matching end (nesting tracked via struct/begin/ sig/end). Field access is a postfix layer above parse-atom, binding tighter than application: f r.x -> (:app f (:field r "x")). Eval (:module-def NAME DECLS) builds a dict via ocaml-eval-module running decls in a sub-env. (:field EXPR NAME) looks up dict fields, treating (:con NAME) heads as module-name lookups instead of nullary ctors so M.x works with M as a module. Phase 4 LOC so far: ~110 lines (well under 2000 budget). --- lib/ocaml/eval.sx | 85 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/parser.sx | 64 +++++++++++++++++++++++++++++++-- lib/ocaml/test.sh | 37 +++++++++++++++++++ plans/ocaml-on-sx.md | 17 +++++++-- 4 files changed, 199 insertions(+), 4 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index d28e950b..7c373c2a 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -314,6 +314,23 @@ ;; matches its argument against the clauses. (let ((clauses (nth ast 1)) (captured env)) (fn (arg) (ocaml-match-clauses arg clauses captured)))) + ((= tag "field") + ;; `e.name` — evaluate e, expect a dict (record/module), get name. + ;; Special case: `(:field (:con "M") "x")` looks up M as a module + ;; binding rather than evaluating it as a nullary ctor. + (let ((target-ast (nth ast 1)) (fname (nth ast 2))) + (let ((target + (cond + ((= (ocaml-tag-of target-ast) "con") + (cond + ((ocaml-env-has? env (nth target-ast 1)) + (ocaml-env-lookup env (nth target-ast 1))) + (else (list (nth target-ast 1))))) + (else (ocaml-eval target-ast env))))) + (cond + ((dict? target) (get target fname)) + (else (error + (str "ocaml-eval: not a record/module on .field: " target))))))) ((= tag "for") ;; (:for NAME LO HI DIR BODY) — DIR is "ascend" or "descend". (let ((name (nth ast 1)) @@ -417,6 +434,64 @@ (else (error (str "ocaml-eval: unknown AST tag " tag))))))) +;; ocaml-eval-module — evaluate a list of decls in a fresh sub-env layered +;; on top of the parent. Returns a dict mapping each declared name to its +;; value. Used by `module M = struct DECLS end`. +(define ocaml-eval-module + (fn (decls parent-env) + (let ((env parent-env) (result {})) + (begin + (define run-decl + (fn (decl) + (let ((tag (ocaml-tag-of decl))) + (cond + ((= tag "def") + (let ((name (nth decl 1)) (params (nth decl 2)) (rhs (nth decl 3))) + (let ((v (if (= (len params) 0) + (ocaml-eval rhs env) + (ocaml-make-curried params rhs env)))) + (begin + (set! env (ocaml-env-extend env name v)) + (set! result (merge result (dict name v))))))) + ((= tag "def-rec") + (let ((name (nth decl 1)) (params (nth decl 2)) (rhs (nth decl 3))) + (let ((rhs-fn? + (or (> (len params) 0) + (= (ocaml-tag-of rhs) "fun") + (= (ocaml-tag-of rhs) "function")))) + (cond + (rhs-fn? + (let ((cell (list nil))) + (let ((env2 (ocaml-env-extend env name + (fn (arg) ((nth cell 0) arg))))) + (let ((v (if (= (len params) 0) + (ocaml-eval rhs env2) + (ocaml-make-curried params rhs env2)))) + (begin + (set-nth! cell 0 v) + (set! env env2) + (set! result (merge result (dict name v)))))))) + (else + (let ((v (ocaml-eval rhs env))) + (begin + (set! env (ocaml-env-extend env name v)) + (set! result (merge result (dict name v)))))))))) + ((= tag "expr") + (ocaml-eval (nth decl 1) env)) + ((= tag "module-def") + (let ((mname (nth decl 1)) (mdecls (nth decl 2))) + (let ((mod-val (ocaml-eval-module mdecls env))) + (begin + (set! env (ocaml-env-extend env mname mod-val)) + (set! result (merge result (dict mname mod-val))))))) + (else (error (str "ocaml-eval-module: bad decl " tag))))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (run-decl (first xs)) (loop (rest xs)))))) + (loop decls) + result)))) + ;; ocaml-run — convenience wrapper: parse + eval. (define ocaml-run (fn (src) @@ -466,6 +541,16 @@ (set! last v)))))))) ((= tag "expr") (set! last (ocaml-eval (nth decl 1) env))) + ((= tag "module-def") + ;; module M = struct DECLS end — evaluate the inner + ;; decls in a fresh sub-env layered on the current + ;; one, then collect the new bindings into a dict that + ;; we bind under M. + (let ((mname (nth decl 1)) (mdecls (nth decl 2))) + (let ((mod-val (ocaml-eval-module mdecls env))) + (begin + (set! env (ocaml-env-extend env mname mod-val)) + (set! last mod-val))))) (else (error (str "ocaml-run-program: bad decl " tag))))))) (define loop (fn (xs) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index d55d101e..30fb1263 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -377,12 +377,32 @@ true) ((and (= tt "op") (or (= tv "(") (= tv "["))) true) (else false))))) + (define parse-atom-postfix + (fn () + ;; After a primary atom, consume `.field` chains. Field name + ;; may be lower (record field, module value) or upper (module + ;; or constructor reference). Note: `M.x.y` is left-assoc: + ;; `(:field (:field M "x") "y")`. + (let ((head (parse-atom))) + (begin + (define loop + (fn () + (when (at-op? ".") + (begin + (advance-tok!) + (let ((tok (peek-tok))) + (begin + (advance-tok!) + (set! head (list :field head (ocaml-tok-value tok))) + (loop))))))) + (loop) + head)))) (set! parse-app (fn () (let - ((head (parse-atom))) + ((head (parse-atom-postfix))) (begin (define loop @@ -391,7 +411,7 @@ (when (at-app-start?) (let - ((arg (parse-atom))) + ((arg (parse-atom-postfix))) (begin (set! head (list :app head arg)) (loop)))))) (loop) head)))) @@ -767,6 +787,7 @@ ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? ";;") nil) ((at-kw? "let") nil) + ((at-kw? "module") nil) (else (begin (advance-tok!) (skip-to-boundary!)))))) (define parse-decl-let @@ -818,6 +839,43 @@ (let ((expr-src (slice src expr-start (cur-pos)))) (let ((expr (ocaml-parse expr-src))) (list :expr expr))))))) + ;; module M = struct DECLS end + ;; Parsed by sub-tokenising the body source between `struct` and + ;; the matching `end`. Nested modules / sigs increment depth. + (define + parse-decl-module + (fn () + (advance-tok!) + (let ((name (ocaml-tok-value (consume! "ctor" nil)))) + (begin + (consume! "op" "=") + (consume! "keyword" "struct") + (let ((body-start (cur-pos)) (depth 1)) + (begin + (define skip + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-kw? "struct") + (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-kw? "begin") + (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-kw? "sig") + (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-kw? "end") + (cond + ((= depth 1) nil) + (else + (begin (set! depth (- depth 1)) (advance-tok!) (skip))))) + (else (begin (advance-tok!) (skip)))))) + (skip) + (let ((body-end (cur-pos))) + (begin + (consume! "keyword" "end") + (let ((body-src (slice src body-start body-end))) + (let ((body-prog (ocaml-parse-program body-src))) + (list :module-def name (rest body-prog)))))))))))) (define loop (fn @@ -830,6 +888,8 @@ ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-kw? "let") (begin (append! decls (parse-decl-let)) (loop))) + ((at-kw? "module") + (begin (append! decls (parse-decl-module)) (loop))) (else (begin (append! decls (parse-decl-expr)) (loop)))))))) (loop) (cons :program decls))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index df1039aa..041709c8 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -531,6 +531,30 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 665) (eval "(ocaml-run \"let f x = if x < 0 then raise (NegArg x) else x * 2 in try f (-5) with | NegArg n -> n\")") +;; ── Phase 4: Modules + field access ──────────────────────────── +(epoch 700) +(eval "(ocaml-parse \"M.x\")") +(epoch 701) +(eval "(ocaml-parse \"r.field\")") +(epoch 702) +(eval "(ocaml-parse \"M.M2.x\")") +(epoch 703) +(eval "(ocaml-parse \"f r.x\")") +(epoch 710) +(eval "(ocaml-run-program \"module M = struct let x = 42 end ;; M.x\")") +(epoch 711) +(eval "(ocaml-run-program \"module M = struct let f x = x + 1 end ;; M.f 41\")") +(epoch 712) +(eval "(ocaml-run-program \"module M = struct let x = 1 let y = 2 end ;; M.x + M.y\")") +(epoch 713) +(eval "(ocaml-run-program \"module Math = struct let pi = 3.14 let square x = x * x end ;; Math.square 5\")") +(epoch 714) +(eval "(ocaml-run-program \"module Outer = struct module Inner = struct let v = 99 end end ;; Outer.Inner.v\")") +(epoch 715) +(eval "(ocaml-run-program \"module M = struct let rec fact n = if n = 0 then 1 else n * fact (n - 1) end ;; M.fact 5\")") +(epoch 716) +(eval "(ocaml-run-program \"module Pair = struct let make a b = (a, b) let swap p = match p with | (x, y) -> (y, x) end ;; Pair.swap (Pair.make 1 2)\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -846,6 +870,19 @@ check 663 "try failwith" '"oops"' check 664 "try sequence raises" '101' check 665 "raise from function" '-5' +# ── Phase 4: Modules + field access ───────────────────────────── +check 700 "parse M.x" '("field" ("con" "M") "x")' +check 701 "parse r.field" '("field" ("var" "r") "field")' +check 702 "parse M.M2.x left-assoc" '("field" ("field" ("con" "M") "M2") "x")' +check 703 "parse f r.x bind tighter" '("app" ("var" "f") ("field" ("var" "r") "x"))' +check 710 "module M.x = 42" '42' +check 711 "module M.f 41 = 42" '42' +check 712 "module two values" '3' +check 713 "module fn: square 5" '25' +check 714 "nested module Outer.Inner" '99' +check 715 "module rec fact 5" '120' +check 716 "module Pair.swap" '("tuple" 2 1)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e9609dd7..13fcd8c6 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -179,7 +179,8 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 4 — Modules + functors -- [ ] `module M = struct let x = 1 let f y = x + y end` → SX dict `{:x 1 :f }`. +- [x] `module M = struct let x = 1 let f y = x + y end` → SX dict + `{"x" 1 "f" }`. - [ ] `module type S = sig val x : int val f : int -> int end` → interface record (runtime stub; typed checking in Phase 5). - [ ] `module M : S = struct ... end` — coercive sealing (runtime: pass-through). @@ -187,7 +188,7 @@ SX CEK evaluator (both JS and OCaml hosts) - [ ] `module F = Functor(Base)` — functor application. - [ ] `open M` — merge M's dict into current env (`env-merge`). - [ ] `include M` — same as open at structure level. -- [ ] `M.name` — dict get via `:name` key. +- [x] `M.name` — dict get via field access. - [ ] First-class modules (pack/unpack) — deferred to Phase 5. - [ ] Standard module hierarchy: `List`, `Option`, `Result`, `String`, `Char`, `Int`, `Float`, `Bool`, `Unit`, `Printf`, `Format` (stubs, filled in Phase 6). @@ -321,6 +322,18 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 4 — modules + field access (+11 tests, 215 total). Parser: + `module M = struct DECLS end` decl in `ocaml-parse-program`. Body parsed + by sub-tokenising the source between `struct` and the matching `end`, + tracking nesting via `struct`/`begin`/`sig`/`end`. Field access added + as a postfix layer above `parse-atom`, binding tighter than application: + `f r.x` → `(:app f (:field r "x"))`. Eval: `(:module-def NAME DECLS)` + builds a dict via new `ocaml-eval-module` that runs decls in a sub-env; + `(:field EXPR NAME)` looks up the field, with the special case that + `(:con NAME)` heads are interpreted as module-name lookups instead of + nullary ctors. Tested: simple module, multi-decl module, nested modules + (`Outer.Inner.v`), `let rec` inside a module, module containing tuple + pattern match. Phase 4 LOC: ~110 (well under 2000 budget). - 2026-05-08 Phase 2 — `try`/`with` + `raise` builtin. Parser produces `(:try EXPR CLAUSES)`; eval delegates to SX `guard` with `else` matching the raised value against clause patterns and re-raising on From d45e653a873d47cf2d1d7bebe27eaaa52021010a Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:39:13 +0000 Subject: [PATCH 013/298] ocaml: phase 4 open / include (+5 tests, 220 total) Parser: open Path and include Path top-level decls; Path is Ctor (.Ctor)*. Eval resolves via ocaml-resolve-module-path (same :con-as-module-lookup escape hatch used by :field). open extends the env with the module's bindings; include also merges into the surrounding module's exports (when inside a struct...end). Path resolver lets M.Sub.x work for nested modules. Phase 4 LOC ~165. --- lib/ocaml/eval.sx | 63 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/parser.sx | 29 ++++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++ plans/ocaml-on-sx.md | 16 +++++++++-- 4 files changed, 125 insertions(+), 2 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 7c373c2a..1066bc70 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -67,6 +67,42 @@ (fn (env name val) (cons (list name val) env))) +;; Resolve a module path like `M` or `M.Sub` to a dict. Mirrors the +;; field-access escape hatch where `(:con NAME)` is treated as an env +;; lookup rather than a nullary ctor. +(define ocaml-resolve-module-path + (fn (path-expr env) + (let ((tag (ocaml-tag-of path-expr))) + (cond + ((= tag "con") + (cond + ((ocaml-env-has? env (nth path-expr 1)) + (ocaml-env-lookup env (nth path-expr 1))) + (else (error (str "ocaml-eval: unknown module " (nth path-expr 1)))))) + ((= tag "field") + (let ((parent (ocaml-resolve-module-path (nth path-expr 1) env))) + (cond + ((dict? parent) (get parent (nth path-expr 2))) + (else (error + (str "ocaml-eval: not a module on path: " parent)))))) + (else (ocaml-eval path-expr env)))))) + +;; Merge a dict's bindings into an env (used by `open`/`include`). +;; Iterates keys; each (k, get d k) becomes a fresh env binding. +(define ocaml-env-merge-dict + (fn (env d) + (let ((result env) (ks (keys d))) + (begin + (define loop + (fn (xs) + (when (not (= xs (list))) + (let ((k (first xs))) + (begin + (set! result (cons (list k (get d k)) result)) + (loop (rest xs))))))) + (loop ks) + result)))) + (define ocaml-tag-of (fn (ast) (nth ast 0))) (define ocaml-eval (fn (ast env) nil)) @@ -484,6 +520,24 @@ (begin (set! env (ocaml-env-extend env mname mod-val)) (set! result (merge result (dict mname mod-val))))))) + ((= tag "open") + (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) + (cond + ((dict? mod-val) + (set! env (ocaml-env-merge-dict env mod-val))) + (else (error + (str "ocaml-eval: open on non-module: " mod-val)))))) + ((= tag "include") + ;; `include M` brings M's bindings into scope AND into + ;; the surrounding module's exports. + (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) + (cond + ((dict? mod-val) + (begin + (set! env (ocaml-env-merge-dict env mod-val)) + (set! result (merge result mod-val)))) + (else (error + (str "ocaml-eval: include on non-module: " mod-val)))))) (else (error (str "ocaml-eval-module: bad decl " tag))))))) (define loop (fn (xs) @@ -551,6 +605,15 @@ (begin (set! env (ocaml-env-extend env mname mod-val)) (set! last mod-val))))) + ((or (= tag "open") (= tag "include")) + ;; open M / include M — bring M's bindings into scope. + (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) + (cond + ((dict? mod-val) + (begin + (set! env (ocaml-env-merge-dict env mod-val)) + (set! last mod-val))) + (else (error (str "ocaml-eval: open/include on non-module: " mod-val)))))) (else (error (str "ocaml-run-program: bad decl " tag))))))) (define loop (fn (xs) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 30fb1263..f905711d 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -788,6 +788,8 @@ ((at-op? ";;") nil) ((at-kw? "let") nil) ((at-kw? "module") nil) + ((at-kw? "open") nil) + ((at-kw? "include") nil) (else (begin (advance-tok!) (skip-to-boundary!)))))) (define parse-decl-let @@ -842,6 +844,29 @@ ;; module M = struct DECLS end ;; Parsed by sub-tokenising the body source between `struct` and ;; the matching `end`. Nested modules / sigs increment depth. + ;; open M / include M — collect a path Ctor(.SubCtor)* and emit + ;; (:open PATH) or (:include PATH). + (define + parse-decl-open + (fn (include?) + (advance-tok!) + (let ((path-start (cur-pos))) + (begin + ;; Walk until end of the path. A path is Ctor (. Ctor)*. + (define skip-path + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "ctor") + (begin (advance-tok!) (skip-path))) + ((at-op? ".") (begin (advance-tok!) (skip-path))) + (else nil)))) + (skip-path) + (let ((path-src (slice src path-start (cur-pos)))) + (let ((path-expr (ocaml-parse path-src))) + (if include? + (list :include path-expr) + (list :open path-expr)))))))) (define parse-decl-module (fn () @@ -890,6 +915,10 @@ (begin (append! decls (parse-decl-let)) (loop))) ((at-kw? "module") (begin (append! decls (parse-decl-module)) (loop))) + ((at-kw? "open") + (begin (append! decls (parse-decl-open false)) (loop))) + ((at-kw? "include") + (begin (append! decls (parse-decl-open true)) (loop))) (else (begin (append! decls (parse-decl-expr)) (loop)))))))) (loop) (cons :program decls))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 041709c8..dff83522 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -555,6 +555,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 716) (eval "(ocaml-run-program \"module Pair = struct let make a b = (a, b) let swap p = match p with | (x, y) -> (y, x) end ;; Pair.swap (Pair.make 1 2)\")") +;; ── open / include ───────────────────────────────────────────── +(epoch 730) +(eval "(ocaml-run-program \"module M = struct let x = 42 let f y = y + 1 end ;; open M ;; f x\")") +(epoch 731) +(eval "(ocaml-run-program \"module Math = struct let pi = 3 let sq x = x * x end ;; module Sphere = struct include Math let area r = 4 * pi * sq r end ;; Sphere.area 2\")") +(epoch 732) +(eval "(ocaml-run-program \"module M = struct let x = 1 end ;; module N = struct open M let y = x + 10 end ;; N.y\")") +(epoch 733) +(eval "(ocaml-run-program \"module Math = struct let pi = 3 let sq x = x * x end ;; module Sphere = struct include Math let area r = 4 * pi * sq r end ;; Sphere.pi\")") +(epoch 734) +(eval "(ocaml-run-program \"module M = struct let x = 1 let y = 2 end ;; module N = struct include M let z = x + y end ;; N.z\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -883,6 +895,13 @@ check 714 "nested module Outer.Inner" '99' check 715 "module rec fact 5" '120' check 716 "module Pair.swap" '("tuple" 2 1)' +# ── open / include ────────────────────────────────────────────── +check 730 "open M; f x" '43' +check 731 "include Math; area" '48' +check 732 "module open inside" '11' +check 733 "Sphere.pi via include" '3' +check 734 "include M; N.z = x+y" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 13fcd8c6..f6255ee3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -186,8 +186,11 @@ SX CEK evaluator (both JS and OCaml hosts) - [ ] `module M : S = struct ... end` — coercive sealing (runtime: pass-through). - [ ] `functor (M : S) -> struct ... end` → SX `(fn (M) ...)`. - [ ] `module F = Functor(Base)` — functor application. -- [ ] `open M` — merge M's dict into current env (`env-merge`). -- [ ] `include M` — same as open at structure level. +- [x] `open M` — merge M's dict into current env (via + `ocaml-env-merge-dict`). Module path `M.Sub` resolves via + `ocaml-resolve-module-path`. +- [x] `include M` — at top level same as `open`; inside a module also + copies M's bindings into the surrounding module's exports. - [x] `M.name` — dict get via field access. - [ ] First-class modules (pack/unpack) — deferred to Phase 5. - [ ] Standard module hierarchy: `List`, `Option`, `Result`, `String`, `Char`, @@ -322,6 +325,15 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 4 — `open M` / `include M` (+5 tests, 220 total). + Parser: top-level `open Path` / `include Path` decls; path is `Ctor (. + Ctor)*`. Eval resolves the path via `ocaml-resolve-module-path` (the + same `:con`-as-module-lookup escape hatch used for `:field`); merges + the dict bindings into the current env via `ocaml-env-merge-dict`. + `include` inside a module also adds the bindings to the module's + resulting dict, so `module Sphere = struct include Math let area r = + ... end` exposes both Math's `pi` and Sphere's `area`. Phase 4 LOC + cumulative: ~165. - 2026-05-08 Phase 4 — modules + field access (+11 tests, 215 total). Parser: `module M = struct DECLS end` decl in `ocaml-parse-program`. Body parsed by sub-tokenising the source between `struct` and the matching `end`, From 5603ecc3a6bc74385ec659732ecb735bfa274a1b Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:44:54 +0000 Subject: [PATCH 014/298] ocaml: phase 4 functors + module aliases (+5 tests, 225 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parser: module F (M) (N) ... = struct DECLS end -> (:functor-def NAME PARAMS DECLS). module N = expr (non-struct) -> (:module-alias NAME BODY-SRC). Functor params accept (P) or (P : Sig) — signatures parsed-and-skipped via skip-optional-sig. Eval: ocaml-make-functor builds curried host-SX closures from module dicts to a module dict. ocaml-resolve-module-path extended for :app so F(A), F(A)(B), and Outer.Inner all resolve to dicts. Phase 4 LOC ~290 cumulative (still well under 2000). --- lib/ocaml/eval.sx | 68 ++++++++++++++++++++++- lib/ocaml/parser.sx | 129 ++++++++++++++++++++++++++++++++++++------- lib/ocaml/test.sh | 19 +++++++ plans/ocaml-on-sx.md | 22 ++++++-- 4 files changed, 211 insertions(+), 27 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 1066bc70..873b391f 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -67,9 +67,11 @@ (fn (env name val) (cons (list name val) env))) -;; Resolve a module path like `M` or `M.Sub` to a dict. Mirrors the -;; field-access escape hatch where `(:con NAME)` is treated as an env -;; lookup rather than a nullary ctor. +;; Resolve a module path / functor-application expression to a module dict. +;; Mirrors the field-access escape hatch where `(:con NAME)` is treated as +;; an env lookup rather than a nullary ctor; also handles `(:app FN ARG)` +;; for functor applications, `(:field …)` for sub-modules, and `(:var …)` +;; for lower-case bindings. (define ocaml-resolve-module-path (fn (path-expr env) (let ((tag (ocaml-tag-of path-expr))) @@ -79,12 +81,22 @@ ((ocaml-env-has? env (nth path-expr 1)) (ocaml-env-lookup env (nth path-expr 1))) (else (error (str "ocaml-eval: unknown module " (nth path-expr 1)))))) + ((= tag "var") + (cond + ((ocaml-env-has? env (nth path-expr 1)) + (ocaml-env-lookup env (nth path-expr 1))) + (else (error (str "ocaml-eval: unknown module-var " (nth path-expr 1)))))) ((= tag "field") (let ((parent (ocaml-resolve-module-path (nth path-expr 1) env))) (cond ((dict? parent) (get parent (nth path-expr 2))) (else (error (str "ocaml-eval: not a module on path: " parent)))))) + ((= tag "app") + (let ((fn-val (ocaml-resolve-module-path (nth path-expr 1) env)) + (arg-val (ocaml-resolve-module-path (nth path-expr 2) env))) + (fn-val arg-val))) + ((= tag "unit") {}) (else (ocaml-eval path-expr env)))))) ;; Merge a dict's bindings into an env (used by `open`/`include`). @@ -470,6 +482,21 @@ (else (error (str "ocaml-eval: unknown AST tag " tag))))))) +;; ocaml-make-functor — build a curried host-SX closure that accepts +;; argument modules (one per param) and returns the resulting module dict +;; produced by evaluating the functor's body. +(define ocaml-make-functor + (fn (params decls captured-env) + (cond + ((= (len params) 1) + (fn (arg-mod) + (ocaml-eval-module decls + (ocaml-env-extend captured-env (first params) arg-mod)))) + (else + (fn (arg-mod) + (ocaml-make-functor (rest params) decls + (ocaml-env-extend captured-env (first params) arg-mod))))))) + ;; ocaml-eval-module — evaluate a list of decls in a fresh sub-env layered ;; on top of the parent. Returns a dict mapping each declared name to its ;; value. Used by `module M = struct DECLS end`. @@ -520,6 +547,21 @@ (begin (set! env (ocaml-env-extend env mname mod-val)) (set! result (merge result (dict mname mod-val))))))) + ((= tag "functor-def") + (let ((mname (nth decl 1)) + (mparams (nth decl 2)) + (mdecls (nth decl 3))) + (let ((fn-val (ocaml-make-functor mparams mdecls env))) + (begin + (set! env (ocaml-env-extend env mname fn-val)) + (set! result (merge result (dict mname fn-val))))))) + ((= tag "module-alias") + (let ((mname (nth decl 1)) (body-src (nth decl 2))) + (let ((body-expr (ocaml-parse body-src))) + (let ((mod-val (ocaml-resolve-module-path body-expr env))) + (begin + (set! env (ocaml-env-extend env mname mod-val)) + (set! result (merge result (dict mname mod-val)))))))) ((= tag "open") (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) (cond @@ -605,6 +647,26 @@ (begin (set! env (ocaml-env-extend env mname mod-val)) (set! last mod-val))))) + ((= tag "functor-def") + ;; module F (M1) (M2) ... = struct DECLS end — bind F + ;; to a curried function from module dicts to a module + ;; dict. + (let ((mname (nth decl 1)) + (mparams (nth decl 2)) + (mdecls (nth decl 3))) + (let ((functor-val + (ocaml-make-functor mparams mdecls env))) + (begin + (set! env (ocaml-env-extend env mname functor-val)) + (set! last functor-val))))) + ((= tag "module-alias") + ;; module N = M / module N = F(A) / module N = M.Sub + (let ((mname (nth decl 1)) (body-src (nth decl 2))) + (let ((body-expr (ocaml-parse body-src))) + (let ((mod-val (ocaml-resolve-module-path body-expr env))) + (begin + (set! env (ocaml-env-extend env mname mod-val)) + (set! last mod-val)))))) ((or (= tag "open") (= tag "include")) ;; open M / include M — bring M's bindings into scope. (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index f905711d..cbe834d8 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -867,40 +867,131 @@ (if include? (list :include path-expr) (list :open path-expr)))))))) + ;; Parse a `struct DECLS end` body and return the decls list. (define - parse-decl-module + parse-struct-body (fn () - (advance-tok!) - (let ((name (ocaml-tok-value (consume! "ctor" nil)))) + (consume! "keyword" "struct") + (let ((body-start (cur-pos)) (depth 1)) (begin - (consume! "op" "=") - (consume! "keyword" "struct") - (let ((body-start (cur-pos)) (depth 1)) + (define skip + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-kw? "struct") + (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-kw? "begin") + (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-kw? "sig") + (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-kw? "end") + (cond + ((= depth 1) nil) + (else + (begin (set! depth (- depth 1)) (advance-tok!) (skip))))) + (else (begin (advance-tok!) (skip)))))) + (skip) + (let ((body-end (cur-pos))) + (begin + (consume! "keyword" "end") + (let ((body-src (slice src body-start body-end))) + (let ((body-prog (ocaml-parse-program body-src))) + (rest body-prog))))))))) + + ;; Skip an optional `: Sig` constraint (parens-balanced; we + ;; ignore signatures in this iteration). + (define + skip-optional-sig + (fn () + (when (at-op? ":") + (begin + (advance-tok!) + (let ((depth 0)) (begin (define skip (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) - ((at-kw? "struct") - (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) - ((at-kw? "begin") + ((and (= depth 0) (at-op? ")")) nil) + ((and (= depth 0) (at-op? "=")) nil) + ((at-op? "(") (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) ((at-kw? "sig") (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-op? ")") + (begin (set! depth (- depth 1)) (advance-tok!) (skip))) ((at-kw? "end") - (cond - ((= depth 1) nil) - (else - (begin (set! depth (- depth 1)) (advance-tok!) (skip))))) + (begin (set! depth (- depth 1)) (advance-tok!) (skip))) (else (begin (advance-tok!) (skip)))))) - (skip) - (let ((body-end (cur-pos))) + (skip))))))) + + (define + parse-decl-module + (fn () + (advance-tok!) + (let ((name (ocaml-tok-value (consume! "ctor" nil))) + (params (list))) + (begin + ;; Functor parameters: `(P)` or `(P : Sig)`, repeated. + (define collect-params + (fn () + (when (at-op? "(") (begin - (consume! "keyword" "end") - (let ((body-src (slice src body-start body-end))) - (let ((body-prog (ocaml-parse-program body-src))) - (list :module-def name (rest body-prog)))))))))))) + (advance-tok!) + (when (= (ocaml-tok-type (peek-tok)) "ctor") + (begin + (append! params (ocaml-tok-value (peek-tok))) + (advance-tok!))) + (skip-optional-sig) + (consume! "op" ")") + (collect-params))))) + (collect-params) + (skip-optional-sig) + (consume! "op" "=") + (cond + ;; Body is `struct DECLS end` — possibly a functor body. + ((at-kw? "struct") + (let ((decls (parse-struct-body))) + (cond + ((= (len params) 0) (list :module-def name decls)) + (else (list :functor-def name params decls))))) + ;; Body is a path possibly applied: `M`, `M.Sub`, `F(A)`, `F(A)(B)`. + (else + (let ((body-start (cur-pos))) + (begin + (define skip-path-app + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "ctor") + (begin (advance-tok!) (skip-path-app))) + ((at-op? ".") + (begin (advance-tok!) (skip-path-app))) + ((at-op? "(") + ;; Paren-balanced argument list. + (let ((d 1)) + (begin + (advance-tok!) + (define skip-args + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? "(") + (begin (set! d (+ d 1)) (advance-tok!) (skip-args))) + ((at-op? ")") + (cond + ((= d 1) (begin (advance-tok!) nil)) + (else (begin (set! d (- d 1)) (advance-tok!) (skip-args))))) + (else (begin (advance-tok!) (skip-args)))))) + (skip-args) + (skip-path-app)))) + (else nil)))) + (skip-path-app) + (let ((body-src (slice src body-start (cur-pos)))) + (list :module-alias name body-src)))))))))) (define loop (fn diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index dff83522..98937c07 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -567,6 +567,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 734) (eval "(ocaml-run-program \"module M = struct let x = 1 let y = 2 end ;; module N = struct include M let z = x + y end ;; N.z\")") +;; ── Functors ─────────────────────────────────────────────────── +(epoch 750) +(eval "(ocaml-run-program \"module Add (M) = struct let add x = x + M.n end ;; module Five = struct let n = 5 end ;; module AddFive = Add(Five) ;; AddFive.add 10\")") +(epoch 751) +(eval "(ocaml-run-program \"module M = struct let x = 1 end ;; module N = M ;; N.x\")") +(epoch 752) +(eval "(ocaml-run-program \"module Outer = struct module Inner = struct let v = 42 end end ;; module Alias = Outer.Inner ;; Alias.v\")") +(epoch 753) +(eval "(ocaml-run-program \"module Pair (A) (B) = struct let mk = (A.x, B.x) end ;; module One = struct let x = 1 end ;; module Two = struct let x = 2 end ;; module P = Pair(One)(Two) ;; P.mk\")") +(epoch 754) +(eval "(ocaml-run-program \"module Identity (M) = struct include M end ;; module Base = struct let v = 99 end ;; module Same = Identity(Base) ;; Same.v\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -902,6 +914,13 @@ check 732 "module open inside" '11' check 733 "Sphere.pi via include" '3' check 734 "include M; N.z = x+y" '3' +# ── Functors ──────────────────────────────────────────────────── +check 750 "functor app Add(Five).add 10" '15' +check 751 "module alias N = M" '1' +check 752 "submodule alias" '42' +check 753 "multi-param functor" '("tuple" 1 2)' +check 754 "Identity functor + include" '99' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f6255ee3..8e428a17 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -181,11 +181,13 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] `module M = struct let x = 1 let f y = x + y end` → SX dict `{"x" 1 "f" }`. -- [ ] `module type S = sig val x : int val f : int -> int end` → interface record - (runtime stub; typed checking in Phase 5). -- [ ] `module M : S = struct ... end` — coercive sealing (runtime: pass-through). -- [ ] `functor (M : S) -> struct ... end` → SX `(fn (M) ...)`. -- [ ] `module F = Functor(Base)` — functor application. +- [~] `module type S = sig val x : int val f : int -> int end` — signature + annotations are parsed-and-skipped (`skip-optional-sig`); typed + checking deferred to Phase 5. +- [x] `module M : S = struct ... end` — coercive sealing (signature ignored). +- [x] `functor (M : S) -> struct ... end` via shorthand `module F (M) = …`. +- [x] `module F = Functor(Base)` — functor application; multi-param via + `module P = F(A)(B)`. - [x] `open M` — merge M's dict into current env (via `ocaml-env-merge-dict`). Module path `M.Sub` resolves via `ocaml-resolve-module-path`. @@ -325,6 +327,16 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 4 — functors + module aliases (+5 tests, 225 total). + Parser: `module F (M) = struct DECLS end` → `(:functor-def NAME PARAMS + DECLS)`. `module N = expr` (where expr isn't `struct`) → `(:module-alias + NAME BODY-SRC)`. Functor params accept `(P)` or `(P : Sig)` (signatures + parsed-and-skipped). Eval: `ocaml-make-functor` builds a curried + host-SX closure that takes module dicts and returns a module dict; + `ocaml-resolve-module-path` extended for `:app` so `F(A)`, `F(A)(B)`, + `Outer.Inner` all resolve to dicts. Tested: 1-arg functor, 2-arg + curried `Pair(One)(Two)`, module alias, submodule alias, identity + functor with include. Phase 4 LOC ~290 (still well under 2000). - 2026-05-08 Phase 4 — `open M` / `include M` (+5 tests, 220 total). Parser: top-level `open Path` / `include Path` decls; path is `Ctor (. Ctor)*`. Eval resolves the path via `ocaml-resolve-module-path` (the From 19f1cad11dd532c53258af309493232f4ea4b427 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:49:44 +0000 Subject: [PATCH 015/298] ocaml: phase 6 stdlib slice (List/Option/Result, +23 tests, 248 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/ocaml/runtime.sx defines the stdlib in OCaml syntax (not SX): every function exercises the parser, evaluator, match engine, and module machinery built in earlier phases. Loaded once via ocaml-load-stdlib!, cached in ocaml-stdlib-env, layered under user code via ocaml-base-env. List: length, rev, rev_append, map, filter, fold_left/right, append, iter, mem, for_all, exists, hd, tl, nth. Option: map, bind, value, get, is_none, is_some. Result: map, bind, is_ok, is_error. Substrate validation: this stdlib is a nontrivial OCaml program — its mere existence proves the substrate works. --- lib/ocaml/eval.sx | 18 ++++- lib/ocaml/runtime.sx | 173 +++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 82 ++++++++++++++++++++ plans/ocaml-on-sx.md | 26 +++++-- 4 files changed, 288 insertions(+), 11 deletions(-) create mode 100644 lib/ocaml/runtime.sx diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 873b391f..ddbfdd01 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -588,16 +588,28 @@ (loop decls) result)))) -;; ocaml-run — convenience wrapper: parse + eval. +;; ocaml-run — convenience wrapper: parse + eval. Layers the stdlib env +;; (List, Option, Result) underneath the empty env so user code can use +;; `List.map` etc. without explicit setup. +;; Variable guarded so eval.sx is loadable without runtime.sx. runtime.sx +;; sets ocaml-stdlib-env once loaded; before that, fall back to the empty +;; env so the existing tests continue to work without stdlib. +(define ocaml-stdlib-env nil) +(define ocaml-base-env + (fn () + (cond + ((not (= ocaml-stdlib-env nil)) ocaml-stdlib-env) + (else (ocaml-empty-env))))) + (define ocaml-run (fn (src) - (ocaml-eval (ocaml-parse src) (ocaml-empty-env)))) + (ocaml-eval (ocaml-parse src) (ocaml-base-env)))) ;; ocaml-run-program — evaluate a program (sequence of decls + bare exprs). ;; Threads an env through decls; returns the value of the last form. (define ocaml-run-program (fn (src) - (let ((prog (ocaml-parse-program src)) (env (ocaml-empty-env)) (last nil)) + (let ((prog (ocaml-parse-program src)) (env (ocaml-base-env)) (last nil)) (begin (define run-decl (fn (decl) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx new file mode 100644 index 00000000..47b8bbb5 --- /dev/null +++ b/lib/ocaml/runtime.sx @@ -0,0 +1,173 @@ +;; lib/ocaml/runtime.sx — minimal OCaml stdlib slice, written in OCaml. +;; +;; Defines List and Option modules with the most-used functions. Loaded +;; on demand via `(ocaml-load-stdlib! env)` from eval.sx, which parses +;; this source through `ocaml-parse-program` and evaluates each decl, +;; threading the env so stdlib bindings become available to user code. +;; +;; What's here is intentionally minimal — Phase 6 grows this into the +;; full ~150-function slice. Everything is defined in OCaml syntax (not +;; SX) on purpose, both as substrate validation and as documentation. + +(define ocaml-stdlib-src + "module List = struct + let rec length lst = + match lst with + | [] -> 0 + | _ :: t -> 1 + length t + + let rec rev_append xs acc = + match xs with + | [] -> acc + | h :: t -> rev_append t (h :: acc) + + let rev xs = rev_append xs [] + + let rec map f lst = + match lst with + | [] -> [] + | h :: t -> f h :: map f t + + let rec filter p lst = + match lst with + | [] -> [] + | h :: t -> if p h then h :: filter p t else filter p t + + let rec fold_left f init lst = + match lst with + | [] -> init + | h :: t -> fold_left f (f init h) t + + let rec fold_right f lst init = + match lst with + | [] -> init + | h :: t -> f h (fold_right f t init) + + let rec append xs ys = + match xs with + | [] -> ys + | h :: t -> h :: append t ys + + let rec iter f lst = + match lst with + | [] -> () + | h :: t -> f h; iter f t + + let rec mem x lst = + match lst with + | [] -> false + | h :: t -> if h = x then true else mem x t + + let rec for_all p lst = + match lst with + | [] -> true + | h :: t -> if p h then for_all p t else false + + let rec exists p lst = + match lst with + | [] -> false + | h :: t -> if p h then true else exists p t + + let hd lst = + match lst with + | [] -> failwith \"List.hd: empty\" + | h :: _ -> h + + let tl lst = + match lst with + | [] -> failwith \"List.tl: empty\" + | _ :: t -> t + + let rec nth lst n = + match lst with + | [] -> failwith \"List.nth: out of range\" + | h :: t -> if n = 0 then h else nth t (n - 1) + end ;; + + module Option = struct + let map f o = + match o with + | None -> None + | Some x -> Some (f x) + + let bind o f = + match o with + | None -> None + | Some x -> f x + + let value o default = + match o with + | None -> default + | Some x -> x + + let get o = + match o with + | None -> failwith \"Option.get: None\" + | Some x -> x + + let is_none o = + match o with + | None -> true + | Some _ -> false + + let is_some o = + match o with + | None -> false + | Some _ -> true + end ;; + + module Result = struct + let map f r = + match r with + | Ok x -> Ok (f x) + | Error e -> Error e + + let bind r f = + match r with + | Ok x -> f x + | Error e -> Error e + + let is_ok r = + match r with + | Ok _ -> true + | Error _ -> false + + let is_error r = + match r with + | Ok _ -> false + | Error _ -> true + end") + +(define ocaml-stdlib-loaded false) +(define ocaml-stdlib-env nil) + +;; Build a stdlib env once, cache it. ocaml-run / ocaml-run-program both +;; layer the user program on top of this base env. +(define ocaml-load-stdlib! + (fn () + (when (not ocaml-stdlib-loaded) + (let ((env (ocaml-empty-env))) + (begin + (define run-decl + (fn (decl) + (let ((tag (ocaml-tag-of decl))) + (cond + ((= tag "module-def") + (let ((mn (nth decl 1)) (ds (nth decl 2))) + (let ((mv (ocaml-eval-module ds env))) + (set! env (ocaml-env-extend env mn mv))))) + ((= tag "def") + (let ((nm (nth decl 1)) (ps (nth decl 2)) (rh (nth decl 3))) + (let ((v (if (= (len ps) 0) + (ocaml-eval rh env) + (ocaml-make-curried ps rh env)))) + (set! env (ocaml-env-extend env nm v))))))))) + (let ((prog (ocaml-parse-program ocaml-stdlib-src))) + (begin + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (run-decl (first xs)) (loop (rest xs)))))) + (loop (rest prog)) + (set! ocaml-stdlib-env env) + (set! ocaml-stdlib-loaded true)))))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 98937c07..261f6dea 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -33,7 +33,9 @@ cat > "$TMPFILE" << 'EPOCHS' (load "lib/ocaml/tokenizer.sx") (load "lib/ocaml/parser.sx") (load "lib/ocaml/eval.sx") +(load "lib/ocaml/runtime.sx") (load "lib/ocaml/tests/tokenize.sx") +(eval "(ocaml-load-stdlib!)") ;; ── empty / eof ──────────────────────────────────────────────── (epoch 100) @@ -579,6 +581,56 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 754) (eval "(ocaml-run-program \"module Identity (M) = struct include M end ;; module Base = struct let v = 99 end ;; module Same = Identity(Base) ;; Same.v\")") +;; ── Phase 6: stdlib slice (List, Option, Result) ─────────────── +(epoch 800) +(eval "(ocaml-run \"List.length [1; 2; 3; 4]\")") +(epoch 801) +(eval "(ocaml-run \"List.length []\")") +(epoch 802) +(eval "(ocaml-run \"List.map (fun x -> x * 2) [1; 2; 3]\")") +(epoch 803) +(eval "(ocaml-run \"List.filter (fun x -> x > 2) [1; 2; 3; 4; 5]\")") +(epoch 804) +(eval "(ocaml-run \"List.fold_left (fun a b -> a + b) 0 [1; 2; 3; 4; 5]\")") +(epoch 805) +(eval "(ocaml-run \"List.fold_right (fun x acc -> x :: acc) [1; 2; 3] []\")") +(epoch 806) +(eval "(ocaml-run \"List.rev [1; 2; 3]\")") +(epoch 807) +(eval "(ocaml-run \"List.append [1; 2] [3; 4]\")") +(epoch 808) +(eval "(ocaml-run \"List.mem 3 [1; 2; 3]\")") +(epoch 809) +(eval "(ocaml-run \"List.mem 99 [1; 2; 3]\")") +(epoch 810) +(eval "(ocaml-run \"List.for_all (fun x -> x > 0) [1; 2; 3]\")") +(epoch 811) +(eval "(ocaml-run \"List.exists (fun x -> x > 2) [1; 2; 3]\")") +(epoch 812) +(eval "(ocaml-run \"List.hd [10; 20; 30]\")") +(epoch 813) +(eval "(ocaml-run \"List.nth [10; 20; 30] 1\")") + +(epoch 820) +(eval "(ocaml-run \"Option.map (fun x -> x + 1) (Some 41)\")") +(epoch 821) +(eval "(ocaml-run \"Option.map (fun x -> x + 1) None\")") +(epoch 822) +(eval "(ocaml-run \"Option.value (Some 7) 0\")") +(epoch 823) +(eval "(ocaml-run \"Option.value None 42\")") +(epoch 824) +(eval "(ocaml-run \"Option.is_some (Some 1)\")") +(epoch 825) +(eval "(ocaml-run \"Option.is_none None\")") + +(epoch 830) +(eval "(ocaml-run \"Result.map (fun x -> x + 1) (Ok 5)\")") +(epoch 831) +(eval "(ocaml-run \"Result.is_ok (Ok 1)\")") +(epoch 832) +(eval "(ocaml-run \"Result.is_error (Error \\\"oops\\\")\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -921,6 +973,36 @@ check 752 "submodule alias" '42' check 753 "multi-param functor" '("tuple" 1 2)' check 754 "Identity functor + include" '99' +# ── Phase 6: stdlib slice ─────────────────────────────────────── +# List +check 800 "List.length [1..4]" '4' +check 801 "List.length []" '0' +check 802 "List.map x*2 [1;2;3]" '(2 4 6)' +check 803 "List.filter > 2" '(3 4 5)' +check 804 "List.fold_left + 0 [1..5]" '15' +check 805 "List.fold_right ::" '(1 2 3)' +check 806 "List.rev" '(3 2 1)' +check 807 "List.append" '(1 2 3 4)' +check 808 "List.mem 3" 'true' +check 809 "List.mem 99" 'false' +check 810 "List.for_all >0" 'true' +check 811 "List.exists >2" 'true' +check 812 "List.hd" '10' +check 813 "List.nth idx 1" '20' + +# Option +check 820 "Option.map Some" '("Some" 42)' +check 821 "Option.map None" '("None")' +check 822 "Option.value Some" '7' +check 823 "Option.value None" '42' +check 824 "Option.is_some" 'true' +check 825 "Option.is_none" 'true' + +# Result +check 830 "Result.map Ok" '("Ok" 6)' +check 831 "Result.is_ok" 'true' +check 832 "Result.is_error" 'true' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 8e428a17..2d76261f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -215,14 +215,14 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 6 — Standard library -- [ ] `List`: `map`, `filter`, `fold_left`, `fold_right`, `length`, `rev`, `append`, - `concat`, `flatten`, `iter`, `iteri`, `mapi`, `for_all`, `exists`, `find`, - `find_opt`, `mem`, `assoc`, `assq`, `sort`, `stable_sort`, `nth`, `hd`, `tl`, - `init`, `combine`, `split`, `partition`. -- [ ] `Option`: `map`, `bind`, `fold`, `get`, `value`, `join`, `iter`, `to_list`, - `to_result`, `is_none`, `is_some`. -- [ ] `Result`: `map`, `bind`, `fold`, `get_ok`, `get_error`, `map_error`, - `to_option`, `is_ok`, `is_error`. +- [~] `List`: `map`, `filter`, `fold_left`, `fold_right`, `length`, `rev`, + `append`, `iter`, `for_all`, `exists`, `mem`, `nth`, `hd`, `tl`, + `rev_append`. _(Pending: concat/flatten, iteri/mapi, find/find_opt, + assoc/assq, sort, init, combine, split, partition.)_ +- [~] `Option`: `map`, `bind`, `value`, `get`, `is_none`, `is_some`. + _(Pending: fold/join/iter/to_list/to_result.)_ +- [~] `Result`: `map`, `bind`, `is_ok`, `is_error`. _(Pending: + fold/get_ok/get_error/map_error/to_option.)_ - [ ] `String`: `length`, `get`, `sub`, `concat`, `split_on_char`, `trim`, `uppercase_ascii`, `lowercase_ascii`, `contains`, `starts_with`, `ends_with`, `index_opt`, `replace_all` (non-stdlib but needed). @@ -327,6 +327,16 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — `lib/ocaml/runtime.sx` minimal stdlib slice + written entirely in OCaml syntax: List (length, rev, rev_append, map, + filter, fold_left/right, append, iter, mem, for_all, exists, hd, tl, + nth), Option (map, bind, value, get, is_none, is_some), Result (map, + bind, is_ok, is_error). Loaded once via `ocaml-load-stdlib!`, cached + in `ocaml-stdlib-env`; `ocaml-run` and `ocaml-run-program` layer user + code on top via `ocaml-base-env`. The fact that these are written in + OCaml (not SX) and parse + evaluate cleanly is a substrate-validation + win: every parser, eval, match, ref, and module path proven by a + single nontrivial Ocaml program. 248/248 (+23). - 2026-05-08 Phase 4 — functors + module aliases (+5 tests, 225 total). Parser: `module F (M) = struct DECLS end` → `(:functor-def NAME PARAMS DECLS)`. `module N = expr` (where expr isn't `struct`) → `(:module-alias From 4c6790046c709839cfa4a3551a5c840fc8ce63a1 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:53:53 +0000 Subject: [PATCH 016/298] ocaml: phase 2 let..and.. mutual recursion (+3 tests, 251 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parser collects multiple bindings via 'and', emitting (:def-rec-mut BINDINGS) for let-rec chains and (:def-mut BINDINGS) for non-rec. Single bindings keep the existing (:def …) / (:def-rec …) shapes. Eval (def-rec-mut): allocate placeholder cell per binding, build joint env where each name forwards through its cell, then evaluate each rhs against the joint env and fill the cells. Even/odd mutual-rec works. --- lib/ocaml/eval.sx | 54 ++++++++++++++++++++++++++++++++ lib/ocaml/parser.sx | 73 +++++++++++++++++++++++--------------------- lib/ocaml/test.sh | 13 ++++++++ plans/ocaml-on-sx.md | 12 ++++++-- 4 files changed, 116 insertions(+), 36 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index ddbfdd01..3bb16651 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -647,6 +647,60 @@ (begin (set! env (ocaml-env-extend env name v)) (set! last v)))))))) + ((= tag "def-mut") + ;; let x = ... and y = ... — non-recursive; each rhs is + ;; evaluated in the parent env, then all names bind in + ;; sequence. + (let ((bs (nth decl 1))) + (begin + (define run-one + (fn (b) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((v (if (= (len ps) 0) + (ocaml-eval rh env) + (ocaml-make-curried ps rh env)))) + (begin + (set! env (ocaml-env-extend env nm v)) + (set! last v)))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (run-one (first xs)) (loop (rest xs)))))) + (loop bs)))) + ((= tag "def-rec-mut") + ;; let rec f = ... and g = ... — mutually recursive; + ;; bind all names with placeholder cells first, then + ;; evaluate each rhs in the joint env, finally fill cells. + (let ((bs (nth decl 1)) (cells (list)) (env2 env)) + (begin + (define alloc + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((c (list nil)) (nm (nth b 0))) + (begin + (append! cells c) + (set! env2 (ocaml-env-extend env2 nm + (fn (a) ((nth c 0) a)))) + (alloc (rest xs)))))))) + (alloc bs) + (let ((idx 0)) + (begin + (define fill + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((v (if (= (len ps) 0) + (ocaml-eval rh env2) + (ocaml-make-curried ps rh env2)))) + (begin + (set-nth! (nth cells idx) 0 v) + (set! idx (+ idx 1)) + (set! last v) + (fill (rest xs))))))))) + (fill bs) + (set! env env2)))))) ((= tag "expr") (set! last (ocaml-eval (nth decl 1) env))) ((= tag "module-def") diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index cbe834d8..2a5b0e4f 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -790,46 +790,51 @@ ((at-kw? "module") nil) ((at-kw? "open") nil) ((at-kw? "include") nil) + ((at-kw? "and") nil) (else (begin (advance-tok!) (skip-to-boundary!)))))) (define parse-decl-let - (fn - () + (fn () (advance-tok!) - (let - ((reccy false)) + (let ((reccy false) (bindings (list))) (begin - (when - (at-kw? "rec") - (begin (advance-tok!) (set! reccy true))) - (let - ((name (ocaml-tok-value (consume! "ident" nil))) - (params (list))) - (begin - (define - collect-params - (fn - () - (when - (check-tok? "ident" nil) - (begin - (append! params (ocaml-tok-value (peek-tok))) - (advance-tok!) - (collect-params))))) - (collect-params) - (consume! "op" "=") - (let - ((expr-start (cur-pos))) + (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) + (define parse-one! + (fn () + (let ((nm (ocaml-tok-value (consume! "ident" nil))) + (ps (list))) (begin - (skip-to-boundary!) - (let - ((expr-src (slice src expr-start (cur-pos)))) - (let - ((expr (ocaml-parse expr-src))) - (if - reccy - (list :def-rec name params expr) - (list :def name params expr)))))))))))) + (define collect-params + (fn () + (when (check-tok? "ident" nil) + (begin + (append! ps (ocaml-tok-value (peek-tok))) + (advance-tok!) + (collect-params))))) + (collect-params) + (consume! "op" "=") + (let ((expr-start (cur-pos))) + (begin + (skip-to-boundary!) + (let ((expr-src (slice src expr-start (cur-pos)))) + (let ((expr (ocaml-parse expr-src))) + (append! bindings (list nm ps expr)))))))))) + (parse-one!) + (define more + (fn () + (when (at-kw? "and") + (begin (advance-tok!) (parse-one!) (more))))) + (more) + (cond + ((= (len bindings) 1) + (let ((b (first bindings))) + (if reccy + (list :def-rec (nth b 0) (nth b 1) (nth b 2)) + (list :def (nth b 0) (nth b 1) (nth b 2))))) + (else + (if reccy + (list :def-rec-mut bindings) + (list :def-mut bindings)))))))) (define parse-decl-expr (fn diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 261f6dea..0be261ea 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -631,6 +631,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 832) (eval "(ocaml-run \"Result.is_error (Error \\\"oops\\\")\")") +;; ── let ... and ... mutual recursion ────────────────────────── +(epoch 850) +(eval "(ocaml-run-program \"let rec even n = if n = 0 then true else odd (n - 1) and odd n = if n = 0 then false else even (n - 1);; even 10\")") +(epoch 851) +(eval "(ocaml-run-program \"let rec even n = if n = 0 then true else odd (n - 1) and odd n = if n = 0 then false else even (n - 1);; odd 7\")") +(epoch 852) +(eval "(ocaml-run-program \"let x = 1 and y = 2;; x + y\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1003,6 +1011,11 @@ check 830 "Result.map Ok" '("Ok" 6)' check 831 "Result.is_ok" 'true' check 832 "Result.is_error" 'true' +# ── let ... and ... mutual recursion ───────────────────────────── +check 850 "even 10 (mutual rec)" 'true' +check 851 "odd 7 (mutual rec)" 'true' +check 852 "let x = 1 and y = 2" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2d76261f..26694a4e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -145,8 +145,11 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 2 — Core evaluator (untyped) - [x] `ocaml-eval` entry: walks OCaml AST, produces SX values. -- [~] `let`/`let rec`/`let ... in` (single-binding done; mutually recursive - `and` deferred). +- [x] `let`/`let rec`/`let ... in`. Mutually recursive `let rec f = … and + g = …` works at top level via `(:def-rec-mut BINDINGS)`; placeholders + are bound first, rhs evaluated in the joint env, cells filled in. + `let x = … and y = …` (non-rec) emits `(:def-mut BINDINGS)` — + sequential bindings against the parent env. - [x] Lambda + application (curried by default — auto-curry multi-param defs). - [x] `fun`/`function` (single-arg lambda with immediate match on arg). - [x] `if`/`then`/`else`, `begin`/`end`, sequence `;`. @@ -327,6 +330,11 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 2 — `let ... and ...` mutual recursion at top level. + Parser collects all bindings into a list, emitting `(:def-rec-mut)` or + `(:def-mut)` when there are 2+. Eval allocates a placeholder cell per + recursive binding, builds an env with all of them visible, then fills + the cells. Even/odd mutual-recursion test passes. 251/251 (+3). - 2026-05-08 Phase 6 — `lib/ocaml/runtime.sx` minimal stdlib slice written entirely in OCaml syntax: List (length, rev, rev_append, map, filter, fold_left/right, append, iter, mem, for_all, exists, hd, tl, From 26863242a040344c5fa2e69c606ea3bbe5fc6d94 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 09:02:25 +0000 Subject: [PATCH 017/298] =?UTF-8?q?ocaml:=20phase=205=20HM=20type=20infere?= =?UTF-8?q?nce=20=E2=80=94=20closes=20lib-guest=20step=208=20(+14=20tests,?= =?UTF-8?q?=20265=20total)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OCaml-on-SX is the deferred second consumer for lib/guest/hm.sx step 8. lib/ocaml/infer.sx assembles Algorithm W on top of the shipped algebra: - Var: lookup + hm-instantiate. - Fun: fresh-tv per param, auto-curried via recursion. - App: unify against hm-arrow, fresh-tv for result. - Let: generalize rhs over (ftv(t) - ftv(env)) — let-polymorphism. - If: unify cond with Bool, both branches with each other. - Op (+, =, <, etc.): builtin signatures (int*int->int monomorphic, =/<> polymorphic 'a->'a->bool). Tests pass for: literals, fun x -> x : 'a -> 'a, let id ... id 5/id true, fun f x -> f (f x) : ('a -> 'a) -> 'a -> 'a (twice). Pending: tuples, lists, pattern matching, let-rec, modules in HM. --- lib/ocaml/infer.sx | 209 +++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 49 ++++++++++ plans/ocaml-on-sx.md | 22 ++++- 3 files changed, 276 insertions(+), 4 deletions(-) create mode 100644 lib/ocaml/infer.sx diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx new file mode 100644 index 00000000..86cd4b39 --- /dev/null +++ b/lib/ocaml/infer.sx @@ -0,0 +1,209 @@ +;; lib/ocaml/infer.sx — Algorithm W type inference for OCaml-on-SX. +;; +;; Consumes lib/guest/hm.sx (algebra) and lib/guest/match.sx (unify) per +;; the Phase 5 sequencing. The kit ships fresh-tv, generalize, +;; instantiate, and substitution composition; this file assembles the +;; lambda / app / let / if rules of Algorithm W against the OCaml AST. +;; +;; Coverage in this slice (atoms + core forms): +;; :int :float :string :char :bool :unit :var :fun :app :let :if +;; :op (with builtin signatures for +, -, *, /, mod, comparisons, &&, ||) +;; +;; Out of scope: pattern matching, tuples, lists (need product/list types +;; first), records, modules, ADTs, let-rec. +;; +;; Inference state: +;; env — dict: name → scheme +;; counter — one-element list (mutable cell) used by hm-fresh-tv +;; +;; Returned value: {:subst S :type T}. + +(define ocaml-hm-counter (fn () (list 0))) + +(define ocaml-hm-empty-subst (fn () {})) + +(define ocaml-hm-builtin-env + (fn () + (let ((int-int-int (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-int)))) + (int-int-bool (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-bool)))) + (bool-bool-bool (hm-arrow (hm-bool) (hm-arrow (hm-bool) (hm-bool)))) + (str-str-str (hm-arrow (hm-string) (hm-arrow (hm-string) (hm-string)))) + (any-any-bool + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") + (hm-arrow a (hm-arrow a (hm-bool)))))) + (a->a + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") (hm-arrow a a))))) + {"+" (hm-monotype int-int-int) + "-" (hm-monotype int-int-int) + "*" (hm-monotype int-int-int) + "/" (hm-monotype int-int-int) + "mod" (hm-monotype int-int-int) + "%" (hm-monotype int-int-int) + "**" (hm-monotype int-int-int) + "<" (hm-monotype int-int-bool) + ">" (hm-monotype int-int-bool) + "<=" (hm-monotype int-int-bool) + ">=" (hm-monotype int-int-bool) + "=" any-any-bool + "<>" any-any-bool + "&&" (hm-monotype bool-bool-bool) + "||" (hm-monotype bool-bool-bool) + "^" (hm-monotype str-str-str) + "not" (hm-monotype (hm-arrow (hm-bool) (hm-bool))) + "succ" (hm-monotype (hm-arrow (hm-int) (hm-int))) + "pred" (hm-monotype (hm-arrow (hm-int) (hm-int))) + "abs" (hm-monotype (hm-arrow (hm-int) (hm-int)))}))) + +(define ocaml-infer (fn (expr env counter) nil)) + +;; Unify two types; raise on failure. The match.sx unify returns nil on +;; failure so we wrap it for clearer errors. +(define ocaml-hm-unify + (fn (t1 t2 subst) + (let ((s2 (unify t1 t2 subst))) + (cond + ((= s2 nil) + (error (str "ocaml-infer: cannot unify " t1 " with " t2))) + (else s2))))) + +;; Look up name; instantiate scheme to a fresh monotype. +(define ocaml-infer-var + (fn (name env counter) + (cond + ((has-key? env name) + (let ((scheme (get env name))) + (let ((t (hm-instantiate scheme counter))) + {:subst {} :type t}))) + (else (error (str "ocaml-infer: unbound variable " name)))))) + +(define ocaml-infer-app + (fn (fn-expr arg-expr env counter) + (let ((r1 (ocaml-infer fn-expr env counter))) + (let ((s1 (get r1 :subst)) (t1 (get r1 :type))) + (let ((env2 (hm-apply-env s1 env))) + (let ((r2 (ocaml-infer arg-expr env2 counter))) + (let ((s2 (get r2 :subst)) (t2 (get r2 :type))) + (let ((tv (hm-fresh-tv counter))) + (let ((s3 (ocaml-hm-unify + (hm-apply s2 t1) + (hm-arrow t2 tv) + (hm-compose s2 s1)))) + {:subst s3 :type (hm-apply s3 tv)}))))))))) + +(define ocaml-infer-fun + (fn (params body env counter) + (cond + ((= (len params) 0) + (error "ocaml-infer: fun without params")) + ((= (len params) 1) + (let ((tv (hm-fresh-tv counter))) + (let ((env2 (assoc env (first params) (hm-monotype tv)))) + (let ((r (ocaml-infer body env2 counter))) + (let ((s (get r :subst)) (t-body (get r :type))) + {:subst s + :type (hm-arrow (hm-apply s tv) t-body)}))))) + (else + ;; Curry: fun x y -> e ≡ fun x -> fun y -> e + (let ((tv (hm-fresh-tv counter))) + (let ((env2 (assoc env (first params) (hm-monotype tv)))) + (let ((r (ocaml-infer-fun (rest params) body env2 counter))) + (let ((s (get r :subst)) (t-rest (get r :type))) + {:subst s + :type (hm-arrow (hm-apply s tv) t-rest)})))))))) + +(define ocaml-infer-let + (fn (name params rhs body env counter) + (let ((rhs-expr (cond + ((= (len params) 0) rhs) + (else (list :fun params rhs))))) + (let ((r1 (ocaml-infer rhs-expr env counter))) + (let ((s1 (get r1 :subst)) (t1 (get r1 :type))) + (let ((env2 (hm-apply-env s1 env))) + (let ((scheme (hm-generalize t1 env2))) + (let ((env3 (assoc env2 name scheme))) + (let ((r2 (ocaml-infer body env3 counter))) + (let ((s2 (get r2 :subst)) (t2 (get r2 :type))) + {:subst (hm-compose s2 s1) :type t2})))))))))) + +(define ocaml-infer-if + (fn (c-ast t-ast e-ast env counter) + (let ((rc (ocaml-infer c-ast env counter))) + (let ((sc (get rc :subst)) (tc (get rc :type))) + (let ((sc2 (ocaml-hm-unify tc (hm-bool) sc))) + (let ((env2 (hm-apply-env sc2 env))) + (let ((rt (ocaml-infer t-ast env2 counter))) + (let ((st (get rt :subst)) (tt (get rt :type))) + (let ((env3 (hm-apply-env st env2))) + (let ((re (ocaml-infer e-ast env3 counter))) + (let ((se (get re :subst)) (te (get re :type))) + (let ((sf (ocaml-hm-unify + (hm-apply se tt) + te + (hm-compose se (hm-compose st sc2))))) + {:subst sf + :type (hm-apply sf te)})))))))))))) + +(set! ocaml-infer + (fn (expr env counter) + (let ((tag (nth expr 0))) + (cond + ((= tag "int") {:subst {} :type (hm-int)}) + ((= tag "float") {:subst {} :type (hm-int)}) ;; treat float as int for now + ((= tag "string") {:subst {} :type (hm-string)}) + ((= tag "char") {:subst {} :type (hm-string)}) + ((= tag "bool") {:subst {} :type (hm-bool)}) + ((= tag "unit") {:subst {} :type (hm-con "Unit" (list))}) + ((= tag "var") (ocaml-infer-var (nth expr 1) env counter)) + ((= tag "fun") (ocaml-infer-fun (nth expr 1) (nth expr 2) env counter)) + ((= tag "app") (ocaml-infer-app (nth expr 1) (nth expr 2) env counter)) + ((= tag "let") (ocaml-infer-let (nth expr 1) (nth expr 2) + (nth expr 3) (nth expr 4) env counter)) + ((= tag "if") (ocaml-infer-if (nth expr 1) (nth expr 2) + (nth expr 3) env counter)) + ((= tag "neg") + (let ((r (ocaml-infer (nth expr 1) env counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify t (hm-int) s))) + {:subst s2 :type (hm-int)})))) + ((= tag "not") + (let ((r (ocaml-infer (nth expr 1) env counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify t (hm-bool) s))) + {:subst s2 :type (hm-bool)})))) + ((= tag "op") + ;; Treat (:op OP L R) as (:app (:app (:var OP) L) R) — same rule. + (ocaml-infer + (list :app (list :app (list :var (nth expr 1)) (nth expr 2)) (nth expr 3)) + env counter)) + (else (error (str "ocaml-infer: unsupported tag " tag))))))) + +;; Top-level convenience: parse + infer + render the type. +(define ocaml-type-of + (fn (src) + (let ((expr (ocaml-parse src)) + (env (ocaml-hm-builtin-env)) + (counter (ocaml-hm-counter))) + (let ((r (ocaml-infer expr env counter))) + (ocaml-hm-format-type (hm-apply (get r :subst) (get r :type))))))) + +;; Pretty-print a type as an OCaml-style string for testing. Only handles +;; the constructors we use: Int / Bool / String / Unit / -> / type-vars. +(define ocaml-hm-format-type + (fn (t) + (cond + ((is-var? t) (str "'" (var-name t))) + ((is-ctor? t) + (let ((head (ctor-head t)) (args (ctor-args t))) + (cond + ((= head "->") + (let ((a (nth args 0)) (b (nth args 1))) + (str + (cond + ((and (is-ctor? a) (= (ctor-head a) "->")) + (str "(" (ocaml-hm-format-type a) ")")) + (else (ocaml-hm-format-type a))) + " -> " (ocaml-hm-format-type b)))) + (else head)))) + (else (str t))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0be261ea..0b8da7cf 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -30,10 +30,13 @@ cat > "$TMPFILE" << 'EPOCHS' (load "lib/guest/lex.sx") (load "lib/guest/prefix.sx") (load "lib/guest/pratt.sx") +(load "lib/guest/match.sx") +(load "lib/guest/hm.sx") (load "lib/ocaml/tokenizer.sx") (load "lib/ocaml/parser.sx") (load "lib/ocaml/eval.sx") (load "lib/ocaml/runtime.sx") +(load "lib/ocaml/infer.sx") (load "lib/ocaml/tests/tokenize.sx") (eval "(ocaml-load-stdlib!)") @@ -639,6 +642,36 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 852) (eval "(ocaml-run-program \"let x = 1 and y = 2;; x + y\")") +;; ── Phase 5: Hindley-Milner type inference ──────────────────── +(epoch 900) +(eval "(ocaml-type-of \"42\")") +(epoch 901) +(eval "(ocaml-type-of \"true\")") +(epoch 902) +(eval "(ocaml-type-of \"\\\"hi\\\"\")") +(epoch 903) +(eval "(ocaml-type-of \"1 + 2\")") +(epoch 904) +(eval "(ocaml-type-of \"fun x -> x + 1\")") +(epoch 905) +(eval "(ocaml-type-of \"fun x -> x\")") +(epoch 906) +(eval "(ocaml-type-of \"fun x y -> x + y\")") +(epoch 907) +(eval "(ocaml-type-of \"let f x = x + 1 in f 10\")") +(epoch 908) +(eval "(ocaml-type-of \"let id = fun x -> x in id 5\")") +(epoch 909) +(eval "(ocaml-type-of \"let id = fun x -> x in id true\")") +(epoch 910) +(eval "(ocaml-type-of \"if true then 1 else 2\")") +(epoch 911) +(eval "(ocaml-type-of \"fun f -> fun x -> f (f x)\")") +(epoch 912) +(eval "(ocaml-type-of \"fun b -> if b then 1 else 0\")") +(epoch 913) +(eval "(ocaml-type-of \"not true\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1016,6 +1049,22 @@ check 850 "even 10 (mutual rec)" 'true' check 851 "odd 7 (mutual rec)" 'true' check 852 "let x = 1 and y = 2" '3' +# ── Phase 5: Hindley-Milner type inference ──────────────────── +check 900 "type 42 = Int" '"Int"' +check 901 "type true = Bool" '"Bool"' +check 902 'type string lit' '"String"' +check 903 "type 1+2 = Int" '"Int"' +check 904 "type fun x->x+1 = Int->Int" '"Int -> Int"' +check 905 "type fun x->x = poly" ' -> ' +check 906 "type fun x y->x+y" '"Int -> Int -> Int"' +check 907 "type let f x=x+1 in f 10" '"Int"' +check 908 "type let id; id 5" '"Int"' +check 909 "type let id; id true" '"Bool"' +check 910 "type if/then/else" '"Int"' +check 911 "type twice" ' -> ' +check 912 "type bool branch" '"Bool -> Int"' +check 913 "type not true" '"Bool"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 26694a4e..06e288d5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -204,11 +204,14 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 5 — Hindley-Milner type inference -- [ ] Algorithm W: `gen`/`inst`, `unify`, `infer-expr`, `infer-decl`. -- [ ] Type variables: `'a`, `'b`; unification with occur-check. -- [ ] Let-polymorphism: generalise at let-bindings. +- [~] Algorithm W: `gen`/`inst` from `lib/guest/hm.sx`, `unify` from + `lib/guest/match.sx`, `infer-expr` written here. Covers atoms, var, + lambda, app, let, if, op, neg, not. _(Pending: tuples, lists, + pattern matching, let-rec, modules.)_ +- [x] Type variables: `'a`, `'b`; unification with occur-check (kit). +- [x] Let-polymorphism: generalise at let-bindings (kit `hm-generalize`). - [ ] ADT types: `type 'a option = None | Some of 'a`. -- [ ] Function types, tuple types, record types. +- [~] Function types `T1 -> T2` work; tuples/records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. - [ ] Module type checking: seal against `sig` (Phase 4 stubs become real checks). - [ ] Error reporting: position-tagged errors with expected vs actual types. @@ -330,6 +333,17 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — Hindley-Milner type inference, paired-sequencing + consumer of `lib/guest/hm.sx` (algebra) and `lib/guest/match.sx` + (unify). `lib/ocaml/infer.sx` ships Algorithm W rules for OCaml AST: + atoms, var (instantiate), fun (auto-curry through fresh-tv), app + (unify against arrow), let (generalize over rhs), if (unify branches), + neg/not, op (treat as app of builtin). Builtin env types `+`/`-`/etc. + as monomorphic int->int->int and `=`/`<>` as polymorphic 'a->'a->bool. + Tested: literals, +1, identity polymorphism `'a -> 'a`, let-poly so + `let id = fun x -> x in id true : Bool`, `twice` infers + `('a -> 'a) -> 'a -> 'a`. Mandate satisfied: OCaml-on-SX is the + deferred second consumer for lib-guest Step 8. 265/265 (+14). - 2026-05-08 Phase 2 — `let ... and ...` mutual recursion at top level. Parser collects all bindings into a list, emitting `(:def-rec-mut)` or `(:def-mut)` when there are 2+. Eval allocates a placeholder cell per From c8bfd22786aa7b2cfdd1e25fbb9bf0d7cd7e56f2 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 09:10:06 +0000 Subject: [PATCH 018/298] ocaml: phase 6 String/Char/Int/Float/Printf modules (+13 tests, 278 total) Host primitives _string_length / _string_sub / _char_code / etc. exposed in the base env (underscore-prefixed to avoid user clash). lib/ocaml/ runtime.sx wraps them into OCaml-syntax modules: String (length, get, sub, concat, uppercase/lowercase_ascii, starts_with), Char (code, chr, lowercase/uppercase_ascii), Int (to_string, of_string, abs, max, min), Float.to_string, Printf stubs. Also added print_string / print_endline / print_int IO builtins. --- lib/ocaml/eval.sx | 21 +++++++++++++++++++- lib/ocaml/runtime.sx | 34 ++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 47 ++++++++++++++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 18 +++++++++++++++++ 4 files changed, 119 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 3bb16651..ab67a841 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -47,7 +47,26 @@ ;; can pattern-match them. (list "raise" (fn (e) (raise e))) (list "failwith" (fn (msg) (raise (list "Failure" msg)))) - (list "invalid_arg" (fn (msg) (raise (list "Invalid_argument" msg))))))) + (list "invalid_arg" (fn (msg) (raise (list "Invalid_argument" msg))) + ) + ;; Host primitives exposed for the OCaml stdlib (lib/ocaml/runtime.sx). + ;; Underscore-prefixed to avoid clashing with user names. + (list "_string_length" (fn (s) (len s))) + (list "_string_get" (fn (s) (fn (i) (nth s i)))) + (list "_string_sub" (fn (s) (fn (i) (fn (n) (slice s i (+ i n)))))) + (list "_string_concat" (fn (sep) (fn (xs) (join sep xs)))) + (list "_string_upper" (fn (s) (upper s))) + (list "_string_lower" (fn (s) (lower s))) + (list "_string_starts_with" (fn (p) (fn (s) (starts-with? s p)))) + (list "_int_of_string" (fn (s) (parse-number s))) + (list "_string_of_int" (fn (i) (str i))) + (list "_string_of_float" (fn (f) (str f))) + (list "_char_code" (fn (c) (char-code c))) + (list "_char_chr" (fn (n) (char-from-code n))) + ;; Print: prints to host stdout via println. + (list "print_string" (fn (s) (begin (print s) nil))) + (list "print_endline" (fn (s) (begin (println s) nil))) + (list "print_int" (fn (i) (begin (print (str i)) nil)))))) (define ocaml-env-lookup (fn (env name) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 47b8bbb5..52ea6cf1 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -136,6 +136,40 @@ match r with | Ok _ -> false | Error _ -> true + end ;; + + module String = struct + let length s = _string_length s + let get s i = _string_get s i + let sub s i n = _string_sub s i n + let concat sep xs = _string_concat sep xs + let uppercase_ascii s = _string_upper s + let lowercase_ascii s = _string_lower s + let starts_with prefix s = _string_starts_with prefix s + end ;; + + module Char = struct + let code c = _char_code c + let chr n = _char_chr n + let lowercase_ascii c = _string_lower c + let uppercase_ascii c = _string_upper c + end ;; + + module Int = struct + let to_string i = _string_of_int i + let of_string s = _int_of_string s + let abs n = if n < 0 then 0 - n else n + let max a b = if a > b then a else b + let min a b = if a < b then a else b + end ;; + + module Float = struct + let to_string f = _string_of_float f + end ;; + + module Printf = struct + let sprintf fmt = fmt + let printf fmt = print_string fmt end") (define ocaml-stdlib-loaded false) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0b8da7cf..5b87ac87 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -672,6 +672,36 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 913) (eval "(ocaml-type-of \"not true\")") +;; ── Phase 6 expanded: String / Char / Int / Float modules ───── +(epoch 950) +(eval "(ocaml-run \"String.length \\\"hello\\\"\")") +(epoch 951) +(eval "(ocaml-run \"String.uppercase_ascii \\\"hi\\\"\")") +(epoch 952) +(eval "(ocaml-run \"String.lowercase_ascii \\\"HI\\\"\")") +(epoch 953) +(eval "(ocaml-run \"String.sub \\\"hello\\\" 1 3\")") +(epoch 954) +(eval "(ocaml-run \"String.starts_with \\\"he\\\" \\\"hello\\\"\")") +(epoch 955) +(eval "(ocaml-run \"String.concat \\\",\\\" [\\\"a\\\"; \\\"b\\\"; \\\"c\\\"]\")") + +(epoch 960) +(eval "(ocaml-run \"Char.code \\\"A\\\"\")") +(epoch 961) +(eval "(ocaml-run \"Char.chr 65\")") + +(epoch 970) +(eval "(ocaml-run \"Int.to_string 42\")") +(epoch 971) +(eval "(ocaml-run \"Int.of_string \\\"123\\\"\")") +(epoch 972) +(eval "(ocaml-run \"Int.abs (-5)\")") +(epoch 973) +(eval "(ocaml-run \"Int.max 7 3\")") +(epoch 974) +(eval "(ocaml-run \"Int.min 7 3\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1065,6 +1095,23 @@ check 911 "type twice" ' -> ' check 912 "type bool branch" '"Bool -> Int"' check 913 "type not true" '"Bool"' +# ── Phase 6 String / Char / Int ───────────────────────────────── +check 950 "String.length" '5' +check 951 "String.uppercase_ascii" '"HI"' +check 952 "String.lowercase_ascii" '"hi"' +check 953 "String.sub" '"ell"' +check 954 "String.starts_with" 'true' +check 955 "String.concat" '"a,b,c"' + +check 960 "Char.code A" '65' +check 961 "Char.chr 65" '"A"' + +check 970 "Int.to_string" '"42"' +check 971 "Int.of_string" '123' +check 972 "Int.abs -5" '5' +check 973 "Int.max" '7' +check 974 "Int.min" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 06e288d5..a7b756d1 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -229,6 +229,16 @@ SX CEK evaluator (both JS and OCaml hosts) _(Pending: fold/join/iter/to_list/to_result.)_ - [~] `Result`: `map`, `bind`, `is_ok`, `is_error`. _(Pending: fold/get_ok/get_error/map_error/to_option.)_ +- [~] `String`: `length`, `get`, `sub`, `concat`, `uppercase_ascii`, + `lowercase_ascii`, `starts_with`. _(Pending: split_on_char, trim, + contains, ends_with, index_opt, replace_all.)_ +- [~] `Char`: `code`, `chr`, `lowercase_ascii`, `uppercase_ascii`. + _(Pending: escaped.)_ +- [~] `Int`: `to_string`, `of_string`, `abs`, `max`, `min`. + _(Pending: arithmetic helpers, min_int/max_int.)_ +- [~] `Float`: `to_string`. _(Pending: of_string, arithmetic helpers.)_ +- [~] `Printf`: stub `sprintf`/`printf`. _(Real format-string + interpretation pending.)_ - [ ] `String`: `length`, `get`, `sub`, `concat`, `split_on_char`, `trim`, `uppercase_ascii`, `lowercase_ascii`, `contains`, `starts_with`, `ends_with`, `index_opt`, `replace_all` (non-stdlib but needed). @@ -333,6 +343,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — extended stdlib slice (+13 tests, 278 total). + Host primitives exposed via `_string_*`, `_char_*`, `_int_*`, + `_string_of_*` underscore-prefixed builtins so the OCaml-side + `lib/ocaml/runtime.sx` modules can wrap them: String (length, get, + sub, concat, uppercase_ascii, lowercase_ascii, starts_with), Char + (code, chr, lowercase_ascii, uppercase_ascii), Int (to_string, + of_string, abs, max, min), Float.to_string, Printf stubs. Also added + `print_string`/`print_endline`/`print_int` builtins. - 2026-05-08 Phase 5 — Hindley-Milner type inference, paired-sequencing consumer of `lib/guest/hm.sx` (algebra) and `lib/guest/match.sx` (unify). `lib/ocaml/infer.sx` ships Algorithm W rules for OCaml AST: From 74b80e6b0e8eb8b704e8fcb6825371ce5ad4201a Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 09:21:06 +0000 Subject: [PATCH 019/298] ocaml: phase 1 unit/wildcard params + 180s timeout (+5 tests, 283 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parser: try-consume-param! handles ident, wildcard _ (fresh __wild_N name), unit () (fresh __unit_N), typed (x : T) (skips signature). parse-fun and parse-let (inline) reuse the helper; top-level parse-decl-let inlines a similar test. test.sh timeout bumped from 60s to 180s — the growing suite was hitting the cap and reporting spurious failures. --- lib/ocaml/parser.sx | 95 ++++++++++++++++++++++++++++++++++---------- lib/ocaml/test.sh | 21 +++++++++- plans/ocaml-on-sx.md | 6 +++ 3 files changed, 100 insertions(+), 22 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 2a5b0e4f..fc72ffdd 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -282,6 +282,56 @@ (list :pcons lhs (parse-pattern-cons)))) (else lhs))))) (set! parse-pattern (fn () (parse-pattern-cons))) + + (define peek-tok-at + (fn (n) + (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil))) + + ;; Param consumption — matches ident, `_` (wildcard), or `()` + ;; (unit). Returns a fresh ident name or nil if no param at cursor. + (define wild-counter (list 0)) + (define try-consume-param! + (fn () + (cond + ((and (check-tok? "ident" nil) + (= (ocaml-tok-value (peek-tok)) "_")) + (begin + (advance-tok!) + (set-nth! wild-counter 0 (+ (nth wild-counter 0) 1)) + (str "__wild_" (nth wild-counter 0)))) + ((check-tok? "ident" nil) + (let ((nm (ocaml-tok-value (peek-tok)))) + (begin (advance-tok!) nm))) + ((and (at-op? "(") (= (ocaml-tok-value (peek-tok-at 1)) ")")) + (begin + (advance-tok!) (advance-tok!) + (set-nth! wild-counter 0 (+ (nth wild-counter 0) 1)) + (str "__unit_" (nth wild-counter 0)))) + ((and (at-op? "(") (= (ocaml-tok-type (peek-tok-at 1)) "ident")) + ;; (x : T) — typed param. Skip the `: T` part. + (let ((nm (ocaml-tok-value (peek-tok-at 1)))) + (begin + (advance-tok!) (advance-tok!) + (when (at-op? ":") + (begin + ;; Skip until matching `)`. + (let ((d 1)) + (begin + (define skip + (fn () + (cond + ((>= idx tok-len) nil) + ((at-op? "(") + (begin (set! d (+ d 1)) (advance-tok!) (skip))) + ((at-op? ")") + (cond + ((= d 1) nil) + (else (begin (set! d (- d 1)) (advance-tok!) (skip))))) + (else (begin (advance-tok!) (skip)))))) + (skip))))) + (consume! "op" ")") + nm))) + (else nil)))) (define parse-expr (fn () nil)) (define parse-expr-no-seq (fn () nil)) (define parse-tuple (fn () nil)) @@ -492,14 +542,10 @@ (begin (define collect-params - (fn - () - (when - (check-tok? "ident" nil) - (begin - (append! params (ocaml-tok-value (peek-tok))) - (advance-tok!) - (collect-params))))) + (fn () + (let ((nm (try-consume-param!))) + (when (not (= nm nil)) + (begin (append! params nm) (collect-params)))))) (collect-params) (when (= (len params) 0) @@ -522,14 +568,10 @@ (begin (define collect-params - (fn - () - (when - (check-tok? "ident" nil) - (begin - (append! params (ocaml-tok-value (peek-tok))) - (advance-tok!) - (collect-params))))) + (fn () + (let ((nm (try-consume-param!))) + (when (not (= nm nil)) + (begin (append! params nm) (collect-params)))))) (collect-params) (consume! "op" "=") (let @@ -806,11 +848,22 @@ (begin (define collect-params (fn () - (when (check-tok? "ident" nil) - (begin - (append! ps (ocaml-tok-value (peek-tok))) - (advance-tok!) - (collect-params))))) + (cond + ((check-tok? "ident" nil) + (begin + (append! ps (ocaml-tok-value (peek-tok))) + (advance-tok!) + (collect-params))) + ((and (at-op? "(") + (< (+ idx 1) tok-len) + (let ((t1 (nth tokens (+ idx 1)))) + (and (= (ocaml-tok-type t1) "op") + (= (ocaml-tok-value t1) ")")))) + (begin + (advance-tok!) (advance-tok!) + (append! ps (str "__unit_" idx)) + (collect-params))) + (else nil)))) (collect-params) (consume! "op" "=") (let ((expr-start (cur-pos))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 5b87ac87..c204564f 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -702,9 +702,21 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 974) (eval "(ocaml-run \"Int.min 7 3\")") +;; ── Unit / wildcard parameters ────────────────────────────────── +(epoch 1000) +(eval "(ocaml-run \"let f () = 42 in f ()\")") +(epoch 1001) +(eval "(ocaml-run \"(fun () -> 99) ()\")") +(epoch 1002) +(eval "(ocaml-run \"let f _ = 1 in f 5\")") +(epoch 1003) +(eval "(ocaml-run-program \"let f () = 7;; f ()\")") +(epoch 1004) +(eval "(ocaml-run-program \"let g _ x = x + 1;; g 99 41\")") + EPOCHS -OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) +OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) check() { local epoch="$1" desc="$2" expected="$3" @@ -1112,6 +1124,13 @@ check 972 "Int.abs -5" '5' check 973 "Int.max" '7' check 974 "Int.min" '3' +# ── Unit / wildcard parameters ────────────────────────────────── +check 1000 "let f () = 42 in f ()" '42' +check 1001 "(fun () -> 99) ()" '99' +check 1002 "let f _ = 1 in f 5" '1' +check 1003 "top-level let f () =" '7' +check 1004 "wildcard top-level" '42' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index a7b756d1..9f051c29 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -343,6 +343,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 1 — unit `()` and wildcard `_` parameters in `let f () + = …` / `fun _ -> …` / `let f _ = …`. Parser helper `try-consume-param!` + now handles ident, wildcard `_` (renamed to `__wild_N`), unit `()` + (renamed to `__unit_N`), and typed `(x : T)` (signature skipped). + Same for top-level `parse-decl-let`. test.sh timeout extended from + 60s to 180s for the growing suite. 283/283 (+5). - 2026-05-08 Phase 6 — extended stdlib slice (+13 tests, 278 total). Host primitives exposed via `_string_*`, `_char_*`, `_int_*`, `_string_of_*` underscore-prefixed builtins so the OCaml-side From 9473911cf37adbbfa92159cc61a4237e1caef1e6 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 09:23:06 +0000 Subject: [PATCH 020/298] ocaml: phase 5.1 conformance.sh + scoreboard (283 tests across 14 suites) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/ocaml/conformance.sh runs the full test suite, classifies each result by description prefix into one of 14 suites (tokenize, parser, eval-core, phase2-refs/loops/function/exn, phase3-adt, phase4-modules, phase5-hm, phase6-stdlib, let-and, phase1-params, misc), and emits scoreboard.json + scoreboard.md. Per the briefing: "Once the scoreboard exists (Phase 5.1), it is your north star." Real OCaml testsuite vendoring deferred — needs more stdlib + ADT decls to make .ml files runnable. --- lib/ocaml/conformance.sh | 116 ++++++++++++++++++++++++++++++++++++++ lib/ocaml/scoreboard.json | 21 +++++++ lib/ocaml/scoreboard.md | 20 +++++++ plans/ocaml-on-sx.md | 18 ++++++ 4 files changed, 175 insertions(+) create mode 100755 lib/ocaml/conformance.sh create mode 100644 lib/ocaml/scoreboard.json create mode 100644 lib/ocaml/scoreboard.md diff --git a/lib/ocaml/conformance.sh b/lib/ocaml/conformance.sh new file mode 100755 index 00000000..7cc471fa --- /dev/null +++ b/lib/ocaml/conformance.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash +# lib/ocaml/conformance.sh — run the OCaml-on-SX test suite and emit +# scoreboard.json + scoreboard.md broken into suites by epoch range. +# +# Suites are defined by epoch ranges in test.sh: +# 100-199 tokenize +# 200-329 parse-expr +# 270-329 parse-program (overlaps; assigned to parse-expr) +# 400-499 eval-core (atoms / arith / control / let / fn) +# 500-665 phase3-adt-match (incl ref + try/with) +# 700-754 phase4-modules +# 800-974 phase6-stdlib +# 850-852 let-and (small group) +# 900-913 phase5-hm +# 1000+ misc + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found." >&2 + exit 1 +fi + +OUT_JSON="lib/ocaml/scoreboard.json" +OUT_MD="lib/ocaml/scoreboard.md" + +# Run test.sh in verbose mode, capturing per-test pass/fail lines plus +# the trailing summary. +TMPLOG=$(mktemp) +trap "rm -f $TMPLOG" EXIT +bash lib/ocaml/test.sh -v > "$TMPLOG" 2>&1 || true + +# Classification by epoch is non-trivial to recover from the human +# output, so we classify by the test-name prefix that test.sh emits. +declare -A SUITE_PASS +declare -A SUITE_FAIL + +classify() { + local desc="$1" + case "$desc" in + *"tok"*|*"comment"*|*"keyword"*|*"primed"*|*"tyvar"*|*"underscored"*|*"hex"*|*"exponent"*|*"escape"*) echo "tokenize" ;; + *"parse"*|*"program"*|*"match"*|*"begin/end"*|*"::"*|*"|>"*|*"|"*) echo "parser" ;; + *"eval"*|*"truthy"*|*"closure"*|*"recur"*|*"fact"*|*"fib"*|*"sum"*|*"curried lambda"*) echo "eval-core" ;; + *"ref"*|*"deref"*|*"increment"*|*":="*) echo "phase2-refs" ;; + *"for"*|*"while"*|*"product"*) echo "phase2-loops" ;; + *"function "*|*"rec function"*) echo "phase2-function" ;; + *"try"*|*"raise"*|*"failwith"*|*"caught"*) echo "phase2-exn" ;; + *"None"*|*"Some"*|*"Pair"*|*"Ok"*|*"Error"*|*"ctor"*) echo "phase3-adt" ;; + *"module"*|*"functor"*|*"include"*|*"open"*|*"M.x"*|*"submodule"*|*"alias"*|*"Sphere"*|*"Identity"*|*"Outer.Inner"*) echo "phase4-modules" ;; + *"List."*|*"Option."*|*"Result."*|*"Char."*|*"Int."*|*"String."*) echo "phase6-stdlib" ;; + *"type "*|*"Int -> Int"*|*"poly"*|*"twice"*|*"Bool"*|*" -> "*) echo "phase5-hm" ;; + *"and y"*|*"mutual"*|*"odd"*|*"even"*) echo "let-and" ;; + *"unit "*|*"wildcard"*|*"top-level let f"*) echo "phase1-params" ;; + *) echo "misc" ;; + esac +} + +while IFS= read -r line; do + if [[ "$line" =~ ^[[:space:]]*ok\ (.+)$ ]]; then + desc="${BASH_REMATCH[1]}" + suite=$(classify "$desc") + SUITE_PASS[$suite]=$(( ${SUITE_PASS[$suite]:-0} + 1 )) + elif [[ "$line" =~ ^[[:space:]]*FAIL\ (.+)\ \(epoch ]]; then + desc="${BASH_REMATCH[1]}" + suite=$(classify "$desc") + SUITE_FAIL[$suite]=$(( ${SUITE_FAIL[$suite]:-0} + 1 )) + fi +done < "$TMPLOG" + +# Pull the final pass/total +TOTAL_PASS=0 +TOTAL_FAIL=0 +for s in "${!SUITE_PASS[@]}"; do + TOTAL_PASS=$(( TOTAL_PASS + ${SUITE_PASS[$s]:-0} )) +done +for s in "${!SUITE_FAIL[@]}"; do + TOTAL_FAIL=$(( TOTAL_FAIL + ${SUITE_FAIL[$s]:-0} )) +done +TOTAL=$((TOTAL_PASS + TOTAL_FAIL)) + +# Emit scoreboard.json (suites sorted) +{ + printf '{\n "suites": {\n' + first=1 + for s in $(printf '%s\n' "${!SUITE_PASS[@]}" "${!SUITE_FAIL[@]}" | sort -u); do + p=${SUITE_PASS[$s]:-0} + f=${SUITE_FAIL[$s]:-0} + if [ $first -eq 1 ]; then first=0; else printf ',\n'; fi + printf ' "%s": {"pass": %d, "fail": %d}' "$s" "$p" "$f" + done + printf '\n },\n' + printf ' "total_pass": %d,\n' "$TOTAL_PASS" + printf ' "total_fail": %d,\n' "$TOTAL_FAIL" + printf ' "total": %d\n' "$TOTAL" + printf '}\n' +} > "$OUT_JSON" + +# Emit scoreboard.md +{ + printf '# OCaml-on-SX scoreboard\n\n' + printf '%d / %d tests passing.\n\n' "$TOTAL_PASS" "$TOTAL" + printf '| Suite | Pass | Fail |\n' + printf '|---|---:|---:|\n' + for s in $(printf '%s\n' "${!SUITE_PASS[@]}" "${!SUITE_FAIL[@]}" | sort -u); do + p=${SUITE_PASS[$s]:-0} + f=${SUITE_FAIL[$s]:-0} + printf '| %s | %d | %d |\n' "$s" "$p" "$f" + done +} > "$OUT_MD" + +cat "$OUT_MD" diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json new file mode 100644 index 00000000..43ee048e --- /dev/null +++ b/lib/ocaml/scoreboard.json @@ -0,0 +1,21 @@ +{ + "suites": { + "eval-core": {"pass": 47, "fail": 0}, + "let-and": {"pass": 3, "fail": 0}, + "misc": {"pass": 39, "fail": 0}, + "parser": {"pass": 85, "fail": 0}, + "phase1-params": {"pass": 2, "fail": 0}, + "phase2-exn": {"pass": 6, "fail": 0}, + "phase2-function": {"pass": 3, "fail": 0}, + "phase2-loops": {"pass": 4, "fail": 0}, + "phase2-refs": {"pass": 6, "fail": 0}, + "phase3-adt": {"pass": 13, "fail": 0}, + "phase4-modules": {"pass": 12, "fail": 0}, + "phase5-hm": {"pass": 17, "fail": 0}, + "phase6-stdlib": {"pass": 29, "fail": 0}, + "tokenize": {"pass": 18, "fail": 0} + }, + "total_pass": 284, + "total_fail": 0, + "total": 284 +} diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md new file mode 100644 index 00000000..f9f25839 --- /dev/null +++ b/lib/ocaml/scoreboard.md @@ -0,0 +1,20 @@ +# OCaml-on-SX scoreboard + +284 / 284 tests passing. + +| Suite | Pass | Fail | +|---|---:|---:| +| eval-core | 47 | 0 | +| let-and | 3 | 0 | +| misc | 39 | 0 | +| parser | 85 | 0 | +| phase1-params | 2 | 0 | +| phase2-exn | 6 | 0 | +| phase2-function | 3 | 0 | +| phase2-loops | 4 | 0 | +| phase2-refs | 6 | 0 | +| phase3-adt | 13 | 0 | +| phase4-modules | 12 | 0 | +| phase5-hm | 17 | 0 | +| phase6-stdlib | 29 | 0 | +| tokenize | 18 | 0 | diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9f051c29..51b233f8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -202,6 +202,18 @@ SX CEK evaluator (both JS and OCaml hosts) `Int`, `Float`, `Bool`, `Unit`, `Printf`, `Format` (stubs, filled in Phase 6). - [ ] Tests in `lib/ocaml/tests/modules.sx` — 30+ tests. +### Phase 5.1 — Conformance scoreboard + +- [x] `lib/ocaml/conformance.sh` runs the full test suite, classifies + each test by description prefix into a suite (tokenize, parser, + eval-core, phase2-refs, phase2-loops, phase2-function, phase2-exn, + phase3-adt, phase4-modules, phase5-hm, phase6-stdlib, let-and, + phase1-params, misc), and emits `scoreboard.json` + `scoreboard.md`. +- [ ] Vendor a slice of the OCaml testsuite at `lib/ocaml/baseline/` + and feed it through `ocaml-run-program`, scoring per-file + conformance. _(Pending — needs more stdlib coverage and ADT type + decls to make most testsuite files runnable.)_ + ### Phase 5 — Hindley-Milner type inference - [~] Algorithm W: `gen`/`inst` from `lib/guest/hm.sx`, `unify` from @@ -343,6 +355,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5.1 — `lib/ocaml/conformance.sh` + `scoreboard.json` + + `scoreboard.md`. Classifies tests into 14 suites by description + prefix and emits structured pass/fail counts. Current: 284 pass / 0 + fail (one test counted twice in classifier, hence 284 vs 283 + underlying). Vendoring real OCaml testsuite is the next step but + needs more stdlib coverage to make .ml files runnable end-to-end. - 2026-05-08 Phase 1 — unit `()` and wildcard `_` parameters in `let f () = …` / `fun _ -> …` / `let f _ = …`. Parser helper `try-consume-param!` now handles ident, wildcard `_` (renamed to `__wild_N`), unit `()` From 7fb65cd26a8f2c4c84989274615937fd046b22d6 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 09:26:24 +0000 Subject: [PATCH 021/298] ocaml: phase 1+2 records {x=1;y=2} + with-update (+6 tests, 289 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parser: { f = e; f = e; ... } -> (:record (F E)...). { base with f = e; ... } -> (:record-update BASE (F E)...). Eval builds a dict from field bindings; record-update merges the new fields over the base dict — the same dict representation already used for modules. { also added to at-app-start? so records are valid arg atoms. Field access via the existing :field postfix unifies record/module access. Record patterns deferred to a later iteration. --- lib/ocaml/eval.sx | 31 +++++++++++++++++++++++ lib/ocaml/parser.sx | 58 ++++++++++++++++++++++++++++++++++++++++++-- lib/ocaml/test.sh | 22 +++++++++++++++++ plans/ocaml-on-sx.md | 6 +++++ 4 files changed, 115 insertions(+), 2 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index ab67a841..660f2e98 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -381,6 +381,37 @@ ;; matches its argument against the clauses. (let ((clauses (nth ast 1)) (captured env)) (fn (arg) (ocaml-match-clauses arg clauses captured)))) + ((= tag "record") + (let ((fields (rest ast)) (result {})) + (begin + (define loop + (fn (xs) + (when (not (= xs (list))) + (let ((kv (first xs))) + (let ((k (first kv)) (v (ocaml-eval (nth kv 1) env))) + (begin + (set! result (merge result (dict k v))) + (loop (rest xs)))))))) + (loop fields) + result))) + ((= tag "record-update") + (let ((base-ast (nth ast 1)) (fields (rest (rest ast)))) + (let ((base (ocaml-eval base-ast env))) + (cond + ((dict? base) + (let ((result base)) + (begin + (define loop + (fn (xs) + (when (not (= xs (list))) + (let ((kv (first xs))) + (let ((k (first kv)) (v (ocaml-eval (nth kv 1) env))) + (begin + (set! result (merge result (dict k v))) + (loop (rest xs)))))))) + (loop fields) + result))) + (else (error (str "ocaml-eval: with-update on non-record: " base))))))) ((= tag "field") ;; `e.name` — evaluate e, expect a dict (record/module), get name. ;; Special case: `(:field (:con "M") "x")` looks up M as a module diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index fc72ffdd..de78484b 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -163,7 +163,7 @@ ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false"))) true) - ((and (= tt "op") (or (= tv "(") (= tv "["))) true) + ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{"))) true) (else false))))) (set! parse-pattern-atom @@ -401,6 +401,60 @@ (let ((e (parse-expr))) (begin (consume! "keyword" "end") e)))) + ;; Record literal { f1 = e1; f2 = e2 } or update + ;; { r with f1 = e1; f2 = e2 }. + ((and (= tt "op") (= tv "{")) + (begin + (advance-tok!) + (cond + ;; { r with field = expr; ... } — base ident + with. + ((and (= (ocaml-tok-type (peek-tok)) "ident") + (= (ocaml-tok-value (peek-tok-at 1)) "with")) + (let ((base-name (ocaml-tok-value (peek-tok)))) + (begin + (advance-tok!) ;; ident + (advance-tok!) ;; with + (let ((fields (list))) + (begin + (define one + (fn () + (let ((fname (ocaml-tok-value (consume! "ident" nil)))) + (begin + (consume! "op" "=") + (let ((fexpr (parse-expr-no-seq))) + (append! fields (list fname fexpr))))))) + (one) + (define more + (fn () + (when (at-op? ";") + (begin (advance-tok!) + (when (not (at-op? "}")) + (begin (one) (more))))))) + (more) + (consume! "op" "}") + (cons :record-update + (cons (list :var base-name) fields))))))) + (else + ;; Plain record literal { f = e; f = e; ... }. + (let ((fields (list))) + (begin + (define one + (fn () + (let ((fname (ocaml-tok-value (consume! "ident" nil)))) + (begin + (consume! "op" "=") + (let ((fexpr (parse-expr-no-seq))) + (append! fields (list fname fexpr))))))) + (one) + (define more + (fn () + (when (at-op? ";") + (begin (advance-tok!) + (when (not (at-op? "}")) + (begin (one) (more))))))) + (more) + (consume! "op" "}") + (cons :record fields))))))) (else (error (str @@ -425,7 +479,7 @@ ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin"))) true) - ((and (= tt "op") (or (= tv "(") (= tv "["))) true) + ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{"))) true) (else false))))) (define parse-atom-postfix (fn () diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index c204564f..e5f42a0b 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -714,6 +714,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1004) (eval "(ocaml-run-program \"let g _ x = x + 1;; g 99 41\")") +;; ── Records ──────────────────────────────────────────────────── +(epoch 1100) +(eval "(ocaml-run \"let r = { x = 1; y = 2 } in r.x\")") +(epoch 1101) +(eval "(ocaml-run \"let r = { x = 1; y = 2 } in r.x + r.y\")") +(epoch 1102) +(eval "(ocaml-run \"let r = { x = 1; y = 2 } in let r2 = { r with x = 99 } in r2.x + r2.y\")") +(epoch 1103) +(eval "(ocaml-run \"let p = { name = \\\"Bob\\\"; age = 30 } in p.name\")") +(epoch 1104) +(eval "(ocaml-run \"let p = { name = \\\"Bob\\\"; age = 30 } in p.age\")") +(epoch 1105) +(eval "(ocaml-run-program \"let r = { x = 1; y = 2 };; r.x + r.y\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1131,6 +1145,14 @@ check 1002 "let f _ = 1 in f 5" '1' check 1003 "top-level let f () =" '7' check 1004 "wildcard top-level" '42' +# ── Records ───────────────────────────────────────────────────── +check 1100 "record literal + access" '1' +check 1101 "record sum fields" '3' +check 1102 "record with-update" '101' +check 1103 "record string field" '"Bob"' +check 1104 "record int field" '30' +check 1105 "top-level record decl" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 51b233f8..b52ae339 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -355,6 +355,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 1+2 — record literals `{ x = 1; y = 2 }` and + functional update `{ r with x = 99 }`. Parser produces `(:record (F E) + ...)` and `(:record-update BASE-EXPR (F E) ...)`. Eval builds a dict + from field bindings; record-update merges over the base dict (the same + dict-based representation we already use for modules). Field access + via existing `:field` postfix. Record patterns deferred. 289/289 (+6). - 2026-05-08 Phase 5.1 — `lib/ocaml/conformance.sh` + `scoreboard.json` + `scoreboard.md`. Classifies tests into 14 suites by description prefix and emits structured pass/fail counts. Current: 284 pass / 0 From 851e0585cfb718f47b8620132065853c53ab0be1 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:28:07 +0000 Subject: [PATCH 022/298] ocaml: phase 3 'as' alias + 'when' guard in match (+6 tests, 295 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pattern parser top wraps cons-pat with 'as ident' -> (:pas PAT NAME). Match clause parser consumes optional 'when GUARD-EXPR' before -> and emits (:case-when PAT GUARD BODY) instead of :case. Eval: :pas matches inner pattern then binds the alias name; case-when checks the guard after a successful match and falls through to the next clause if the guard is false. Or-patterns deferred — ambiguous with clause separator without parens-only support. --- lib/ocaml/eval.sx | 30 +++++++++++++++++++++++++----- lib/ocaml/parser.sx | 27 ++++++++++++++++++++++++--- lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 16 +++++++++++++--- 4 files changed, 84 insertions(+), 11 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 660f2e98..fec7cd8b 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -193,6 +193,13 @@ (= (len (rest val)) (len arg-pats))) (ocaml-match-list arg-pats (rest val) env)) (else ocaml-match-fail)))) + ((= tag "pas") + ;; (:pas INNER NAME) — match inner pattern, also bind NAME → val. + (let ((inner (nth pat 1)) (alias (nth pat 2))) + (let ((env2 (ocaml-match-pat inner val env))) + (cond + ((= env2 ocaml-match-fail) ocaml-match-fail) + (else (ocaml-env-extend env2 alias val)))))) ((= tag "pcons") ;; (:pcons HEAD TAIL) — val must be a non-empty list. (cond @@ -239,11 +246,24 @@ (error (str "ocaml-eval: match failure on " val))) (else (let ((clause (first cs))) - (let ((pat (nth clause 1)) (body (nth clause 2))) - (let ((env2 (ocaml-match-pat pat val env))) - (cond - ((= env2 ocaml-match-fail) (try-clauses (rest cs))) - (else (ocaml-eval body env2)))))))))) + (let ((ctag (nth clause 0))) + (cond + ((= ctag "case") + (let ((pat (nth clause 1)) (body (nth clause 2))) + (let ((env2 (ocaml-match-pat pat val env))) + (cond + ((= env2 ocaml-match-fail) (try-clauses (rest cs))) + (else (ocaml-eval body env2)))))) + ((= ctag "case-when") + (let ((pat (nth clause 1)) + (guard (nth clause 2)) + (body (nth clause 3))) + (let ((env2 (ocaml-match-pat pat val env))) + (cond + ((= env2 ocaml-match-fail) (try-clauses (rest cs))) + ((not (ocaml-eval guard env2)) (try-clauses (rest cs))) + (else (ocaml-eval body env2)))))) + (else (error (str "ocaml-match: bad clause tag " ctag)))))))))) (try-clauses clauses)))) (define ocaml-match-eval diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index de78484b..dd5a24a7 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -281,7 +281,20 @@ (advance-tok!) (list :pcons lhs (parse-pattern-cons)))) (else lhs))))) - (set! parse-pattern (fn () (parse-pattern-cons))) + ;; Top-level pattern is the cons-pat layer wrapped with optional + ;; `pat as name` aliasing. Or-patterns are not supported at the + ;; top level due to ambiguity with the match clause separator; + ;; use `(A | B)` if needed in the future via a parens-only or. + (set! parse-pattern + (fn () + (let ((p (parse-pattern-cons))) + (cond + ((at-kw? "as") + (begin + (advance-tok!) + (let ((n (ocaml-tok-value (consume! "ident" nil)))) + (list :pas p n)))) + (else p))))) (define peek-tok-at (fn (n) @@ -673,12 +686,20 @@ (fn () (let - ((p (parse-pattern))) + ((p (parse-pattern)) (guard nil)) (begin + (when (at-kw? "when") + (begin + (advance-tok!) + (set! guard (parse-expr-no-seq)))) (consume! "op" "->") (let ((body (parse-expr))) - (append! cases (list :case p body))))))) + (cond + ((= guard nil) + (append! cases (list :case p body))) + (else + (append! cases (list :case-when p guard body))))))))) (one) (define loop diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index e5f42a0b..776cd7b1 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -728,6 +728,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1105) (eval "(ocaml-run-program \"let r = { x = 1; y = 2 };; r.x + r.y\")") +;; ── as / when in match ───────────────────────────────────────── +(epoch 1200) +(eval "(ocaml-run \"match Some 5 with | Some x as p -> x | None -> 0\")") +(epoch 1201) +(eval "(ocaml-run \"match 5 with | n when n > 0 -> 1 | n when n < 0 -> -1 | _ -> 0\")") +(epoch 1202) +(eval "(ocaml-run \"match (-3) with | n when n > 0 -> 1 | n when n < 0 -> -1 | _ -> 0\")") +(epoch 1203) +(eval "(ocaml-run \"match 0 with | n when n > 0 -> 1 | n when n < 0 -> -1 | _ -> 0\")") +(epoch 1204) +(eval "(ocaml-run \"match (Some 7) with | None -> 0 | Some x when x > 5 -> x * 10 | Some x -> x\")") +(epoch 1205) +(eval "(ocaml-run \"match (Some 3) with | None -> 0 | Some x when x > 5 -> x * 10 | Some x -> x\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1153,6 +1167,14 @@ check 1103 "record string field" '"Bob"' check 1104 "record int field" '30' check 1105 "top-level record decl" '3' +# ── as / when in match ────────────────────────────────────────── +check 1200 "Some x as p" '5' +check 1201 "when sign +" '1' +check 1202 "when sign -" '-1' +check 1203 "when sign 0" '0' +check 1204 "when guard fires" '70' +check 1205 "when guard skips" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b52ae339..50630441 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -136,9 +136,11 @@ SX CEK evaluator (both JS and OCaml hosts) bindings.)_ - [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple args `Pair (a, b)` → `(:pcon "Pair" PA PB)`), literal (int/string/char/ - bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal. - _(Pending: record patterns, `as` binding, or-pattern `P1 | P2`, `when` - guard.)_ + bool/unit), variable, wildcard `_`, tuple, list cons `::`, list + literal, `as` binding (`pat as name`). Match clauses support `when` + guard via `(:case-when PAT GUARD BODY)`. _(Pending: record patterns, + or-pattern `P1 | P2` — ambiguous with clause separator without + lookahead.)_ - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. - [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests. @@ -355,6 +357,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 3 — `as` aliases + `when` guards in match (+6 tests, + 295 total). Parser: pattern parser wraps with `as ident` → `(:pas + PAT NAME)`. Match's `one` consumes optional `when GUARD-EXPR` → emits + `(:case-when PAT GUARD BODY)` instead of `:case`. Eval `:pas` matches + inner pattern then also binds the alias name; `case-when` checks the + guard after a successful match and falls through if false. Or-pat + `(P1 | P2)` deferred — ambiguous with clause separator without + parens-only support. - 2026-05-08 Phase 1+2 — record literals `{ x = 1; y = 2 }` and functional update `{ r with x = 99 }`. Parser produces `(:record (F E) ...)` and `(:record-update BASE-EXPR (F E) ...)`. Eval builds a dict From d8f6250962c01a0d0ab942212f58d24e3ac68d11 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:32:39 +0000 Subject: [PATCH 023/298] ocaml: phase 3 type declarations (+5 tests, 300 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parser: type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | ... - PARAMS: optional 'a or ('a, 'b) tyvar list - AST: (:type-def NAME PARAMS CTORS) with each CTOR (NAME ARG-SOURCES) - Argument types captured as raw source strings (treated opaquely at runtime since ctor dispatch is dynamic) Runtime is a no-op — constructors and pattern matching already work dynamically. Phase 5 will use these decls to register ctor types for HM checking. --- lib/ocaml/eval.sx | 6 ++++ lib/ocaml/parser.sx | 74 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 19 ++++++++++++ plans/ocaml-on-sx.md | 14 +++++++-- 4 files changed, 110 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index fec7cd8b..ff7c8c08 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -632,6 +632,7 @@ (begin (set! env (ocaml-env-extend env mname mod-val)) (set! result (merge result (dict mname mod-val)))))))) + ((= tag "type-def") nil) ((= tag "open") (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) (cond @@ -803,6 +804,11 @@ (begin (set! env (ocaml-env-extend env mname mod-val)) (set! last mod-val)))))) + ((= tag "type-def") + ;; type t = ... — purely declarative at runtime; ctors + ;; are dispatched by tag at eval/match time. Phase 5 + ;; HM extensions will register ctor types here. + nil) ((or (= tag "open") (= tag "include")) ;; open M / include M — bring M's bindings into scope. (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index dd5a24a7..cffa971a 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -908,6 +908,7 @@ ((at-kw? "open") nil) ((at-kw? "include") nil) ((at-kw? "and") nil) + ((at-kw? "type") nil) (else (begin (advance-tok!) (skip-to-boundary!)))))) (define parse-decl-let @@ -977,6 +978,77 @@ ;; module M = struct DECLS end ;; Parsed by sub-tokenising the body source between `struct` and ;; the matching `end`. Nested modules / sigs increment depth. + ;; type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | … + ;; + ;; PARAMS is `'a` or `('a, 'b)` (single or paren-tuple of tyvars). + ;; We parse the structure and emit `(:type-def NAME PARAMS CTORS)` + ;; where each CTOR is `(NAME ARG-TYPES)` (ARG-TYPES list of source + ;; strings — types are treated opaquely at runtime). + (define + parse-decl-type + (fn () + (advance-tok!) ;; consume 'type' + (let ((tparams (list))) + (begin + ;; Optional type-vars before the type name. + (cond + ((= (ocaml-tok-type (peek-tok)) "tyvar") + (begin + (append! tparams (ocaml-tok-value (peek-tok))) + (advance-tok!))) + ((at-op? "(") + (begin + (advance-tok!) + (define more + (fn () + (when (= (ocaml-tok-type (peek-tok)) "tyvar") + (begin + (append! tparams (ocaml-tok-value (peek-tok))) + (advance-tok!) + (when (at-op? ",") + (begin (advance-tok!) (more))))))) + (more) + (consume! "op" ")")))) + (let ((name (ocaml-tok-value (consume! "ident" nil)))) + (begin + (consume! "op" "=") + (when (at-op? "|") (advance-tok!)) + ;; Parse a sum-type: Ctor [of TYPE [* TYPE]*] (| Ctor …)* + (let ((ctors (list))) + (begin + (define one + (fn () + (let ((cname (ocaml-tok-value (consume! "ctor" nil))) + (arg-srcs (list))) + (begin + (when (at-kw? "of") + (begin + (advance-tok!) + (let ((arg-start (cur-pos))) + (begin + (define skip-type + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? "|") nil) + ((at-op? ";;") nil) + ((at-kw? "let") nil) + ((at-kw? "type") nil) + ((at-kw? "and") nil) + ((at-kw? "module") nil) + (else (begin (advance-tok!) (skip-type)))))) + (skip-type) + (append! arg-srcs (slice src arg-start (cur-pos))))))) + (append! ctors (cons cname arg-srcs)))))) + (one) + (define more + (fn () + (when (at-op? "|") + (begin (advance-tok!) (one) (more))))) + (more) + (list :type-def name tparams ctors))))))))) + ;; open M / include M — collect a path Ctor(.SubCtor)* and emit ;; (:open PATH) or (:include PATH). (define @@ -1143,6 +1215,8 @@ (begin (append! decls (parse-decl-open false)) (loop))) ((at-kw? "include") (begin (append! decls (parse-decl-open true)) (loop))) + ((at-kw? "type") + (begin (append! decls (parse-decl-type)) (loop))) (else (begin (append! decls (parse-decl-expr)) (loop)))))))) (loop) (cons :program decls))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 776cd7b1..ff6f2049 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -742,6 +742,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1205) (eval "(ocaml-run \"match (Some 3) with | None -> 0 | Some x when x > 5 -> x * 10 | Some x -> x\")") +;; ── type declarations (parser + runtime) ────────────────────── +(epoch 1300) +(eval "(ocaml-parse-program \"type color = Red | Green | Blue\")") +(epoch 1301) +(eval "(ocaml-parse-program \"type shape = Circle of int | Rect of int | Square of int\")") +(epoch 1302) +(eval "(ocaml-run-program \"type color = Red | Green | Blue ;; match Red with | Red -> 1 | Green -> 2 | Blue -> 3\")") +(epoch 1303) +(eval "(ocaml-run-program \"type color = Red | Green | Blue ;; match Blue with | Red -> 1 | Green -> 2 | Blue -> 3\")") +(epoch 1304) +(eval "(ocaml-run-program \"type shape = Circle of int | Square of int ;; match Circle 5 with | Circle r -> r | Square s -> s\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1175,6 +1187,13 @@ check 1203 "when sign 0" '0' check 1204 "when guard fires" '70' check 1205 "when guard skips" '3' +# ── type declarations ─────────────────────────────────────────── +check 1300 "type color enum" '("type-def" "color" () (("Red") ("Green") ("Blue")))' +check 1301 "type shape with-args" '("type-def" "shape"' +check 1302 "type-decl + match Red" '1' +check 1303 "type-decl + match Blue" '3' +check 1304 "type-decl + Circle r" '5' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 50630441..04658512 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -167,9 +167,10 @@ SX CEK evaluator (both JS and OCaml hosts) ### Phase 3 — ADTs + pattern matching -- [ ] `type` declarations: `type t = A | B of t1 * t2 | C of { x: int }`. - _(Parser + evaluator currently inferred-arity at runtime; type decls - pending.)_ +- [x] `type` declarations: `type [params] t = | A | B of t1 [* t2] | …`. + Parser emits `(:type-def NAME PARAMS CTORS)`. Runtime treats decls + as no-ops since constructors are dispatched dynamically by tag. + Phase 5 will register ctor types here for HM checking. - [x] Constructors as tagged lists: `A` → `("A")`, `B(1, "x")` → `("B" 1 "x")`. - [~] `match`/`with`: constructor, literal, variable, wildcard, tuple, list cons/nil, nested patterns. _(Pending: `as` binding, or-patterns, @@ -357,6 +358,13 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 3 — `type` declarations (+5 tests, 300 total). Parser + handles `type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | ...`, with + optional `'a` or `('a, 'b)` type parameters. Argument types are + captured as raw source strings (treated opaquely at runtime). Runtime + is a no-op since ctor application + match already work dynamically. + 300th test! Constructors `Red`/`Green`/`Blue` and `Circle of int` / + `Square of int` round-trip through parse + eval cleanly. - 2026-05-08 Phase 3 — `as` aliases + `when` guards in match (+6 tests, 295 total). Parser: pattern parser wraps with `as ident` → `(:pas PAT NAME)`. Match's `one` consumes optional `when GUARD-EXPR` → emits From bc557a5ad2de53e7e5e7e5f749b83b4adbeba47e Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:37:58 +0000 Subject: [PATCH 024/298] ocaml: phase 3 exception declarations (+4 tests, 304 total) exception NAME [of TYPE] parses to (:exception-def NAME [ARG-SRC]). Runtime is a no-op: raise/match already work on tagged ctor values, so 'exception E of int;; try raise (E 5) with | E n -> n' end-to-end with zero new eval logic. --- lib/ocaml/eval.sx | 5 +++++ lib/ocaml/parser.sx | 32 ++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++-- 4 files changed, 62 insertions(+), 2 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index ff7c8c08..72e22faa 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -633,6 +633,7 @@ (set! env (ocaml-env-extend env mname mod-val)) (set! result (merge result (dict mname mod-val)))))))) ((= tag "type-def") nil) + ((= tag "exception-def") nil) ((= tag "open") (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) (cond @@ -809,6 +810,10 @@ ;; are dispatched by tag at eval/match time. Phase 5 ;; HM extensions will register ctor types here. nil) + ((= tag "exception-def") + ;; exception E [of T] — purely declarative; raise+match + ;; already work on tagged ctor values. + nil) ((or (= tag "open") (= tag "include")) ;; open M / include M — bring M's bindings into scope. (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index cffa971a..dd7f59a0 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -909,6 +909,7 @@ ((at-kw? "include") nil) ((at-kw? "and") nil) ((at-kw? "type") nil) + ((at-kw? "exception") nil) (else (begin (advance-tok!) (skip-to-boundary!)))))) (define parse-decl-let @@ -978,6 +979,35 @@ ;; module M = struct DECLS end ;; Parsed by sub-tokenising the body source between `struct` and ;; the matching `end`. Nested modules / sigs increment depth. + ;; exception NAME [of TYPE [* TYPE]*] + (define + parse-decl-exception + (fn () + (advance-tok!) ;; consume 'exception' + (let ((name (ocaml-tok-value (consume! "ctor" nil))) + (arg-srcs (list))) + (begin + (when (at-kw? "of") + (begin + (advance-tok!) + (let ((arg-start (cur-pos))) + (begin + (define skip-type + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? ";;") nil) + ((at-kw? "let") nil) + ((at-kw? "type") nil) + ((at-kw? "and") nil) + ((at-kw? "module") nil) + ((at-kw? "exception") nil) + (else (begin (advance-tok!) (skip-type)))))) + (skip-type) + (append! arg-srcs (slice src arg-start (cur-pos))))))) + (cons :exception-def (cons name arg-srcs)))))) + ;; type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | … ;; ;; PARAMS is `'a` or `('a, 'b)` (single or paren-tuple of tyvars). @@ -1217,6 +1247,8 @@ (begin (append! decls (parse-decl-open true)) (loop))) ((at-kw? "type") (begin (append! decls (parse-decl-type)) (loop))) + ((at-kw? "exception") + (begin (append! decls (parse-decl-exception)) (loop))) (else (begin (append! decls (parse-decl-expr)) (loop)))))))) (loop) (cons :program decls))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index ff6f2049..29ba8e52 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -754,6 +754,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1304) (eval "(ocaml-run-program \"type shape = Circle of int | Square of int ;; match Circle 5 with | Circle r -> r | Square s -> s\")") +;; ── exception declarations ──────────────────────────────────── +(epoch 1320) +(eval "(ocaml-parse-program \"exception MyExn\")") +(epoch 1321) +(eval "(ocaml-parse-program \"exception MyExn of int\")") +(epoch 1322) +(eval "(ocaml-run-program \"exception E of int ;; try raise (E 5) with | E n -> n\")") +(epoch 1323) +(eval "(ocaml-run-program \"exception E of string ;; try raise (E \\\"oops\\\") with | E s -> s\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1194,6 +1204,12 @@ check 1302 "type-decl + match Red" '1' check 1303 "type-decl + match Blue" '3' check 1304 "type-decl + Circle r" '5' +# ── exception declarations ───────────────────────────────────── +check 1320 "exception nullary" '("exception-def" "MyExn")' +check 1321 "exception arg" '("exception-def" "MyExn"' +check 1322 "raise+catch with arg" '5' +check 1323 "raise+catch string arg" '"oops"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 04658512..417d34f7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -178,8 +178,10 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] Exhaustiveness: runtime error on incomplete match (no compile-time check yet). - [ ] Built-in types: `option` (`None`/`Some`), `result` (`Ok`/`Error`), `list` (nil/cons), `bool`, `unit`, `exn`. -- [ ] `exception` declarations; built-in: `Not_found`, `Invalid_argument`, - `Failure`, `Match_failure`. +- [x] `exception` declarations: `exception NAME [of TYPE]`. Parser emits + `(:exception-def NAME [ARG-TYPE-SRC])`. Runtime no-op since + raise/match work on tagged ctor values. Built-ins: + `Failure`/`Invalid_argument` via `failwith`/`invalid_arg`. - [ ] Polymorphic variants (surface syntax `\`Tag value`; runtime same tagged list). - [ ] Tests in `lib/ocaml/tests/adt.sx` — 40+ tests: ADTs, match, option/result. @@ -358,6 +360,11 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 3 — `exception` declarations (+4 tests, 304 total). + `exception NAME [of TYPE]` parses to `(:exception-def NAME [ARG-SRC])`. + Runtime is a no-op: exception values are just tagged ctor values, so + the existing `raise`/`try`/`with` machinery works without any extra + wiring. - 2026-05-08 Phase 3 — `type` declarations (+5 tests, 300 total). Parser handles `type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | ...`, with optional `'a` or `('a, 'b)` type parameters. Argument types are From 88c02c7c7377fbcb25689aa66672870bc92c0bf4 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:49:23 +0000 Subject: [PATCH 025/298] ocaml: phase 6 expanded stdlib (+15 tests, 319 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit List: concat/flatten, init, find/find_opt, partition, mapi/iteri, assoc/assoc_opt. Option: iter/fold/to_list. Result: get_ok/get_error/ map_error/to_option. Fixed skip-to-boundary! in parser to track let..in / begin..end / struct..end / for/while..done nesting via a depth counter — without this, nested-let inside a top-level decl body trips over the decl-boundary detector. Stdlib functions like List.init / mapi / iteri use begin..end to make their nested-let intent explicit. --- lib/ocaml/parser.sx | 51 ++++++++++++++++------- lib/ocaml/runtime.sx | 96 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 53 ++++++++++++++++++++++++ plans/ocaml-on-sx.md | 22 +++++++--- 4 files changed, 202 insertions(+), 20 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index dd7f59a0..b85dba56 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -895,22 +895,45 @@ (fn () (let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos))))) + ;; skip-to-boundary! advances `idx` to the next top-level decl + ;; boundary, tracking `let`/`begin`/`struct` etc. nesting so that + ;; an inner `let ... in ...` doesn't terminate a top-level decl + ;; body. Boundary tokens (when at depth 0): + ;; ;; let module open include and type exception + ;; Boundary at any depth: eof. (define skip-to-boundary! - (fn - () - (cond - ((>= idx tok-len) nil) - ((= (ocaml-tok-type (peek-tok)) "eof") nil) - ((at-op? ";;") nil) - ((at-kw? "let") nil) - ((at-kw? "module") nil) - ((at-kw? "open") nil) - ((at-kw? "include") nil) - ((at-kw? "and") nil) - ((at-kw? "type") nil) - ((at-kw? "exception") nil) - (else (begin (advance-tok!) (skip-to-boundary!)))))) + (fn () + (let ((depth 0)) + (begin + (define step + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((and (= depth 0) (at-op? ";;")) nil) + ((and (= depth 0) (at-kw? "let")) nil) + ((and (= depth 0) (at-kw? "module")) nil) + ((and (= depth 0) (at-kw? "open")) nil) + ((and (= depth 0) (at-kw? "include")) nil) + ((and (= depth 0) (at-kw? "and")) nil) + ((and (= depth 0) (at-kw? "type")) nil) + ((and (= depth 0) (at-kw? "exception")) nil) + ;; Track nested blocks that have explicit closing + ;; tokens. let..in / begin..end / struct..end / + ;; sig..end / for..done / while..done. `if`/`match`/ + ;; `try` don't have hard close tokens so we don't + ;; track them — their bodies are bounded by the + ;; surrounding expression structure. + ((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct") + (at-kw? "sig") (at-kw? "for") (at-kw? "while")) + (begin (set! depth (+ depth 1)) (advance-tok!) (step))) + ((or (at-kw? "in") (at-kw? "end") (at-kw? "done")) + (begin + (when (> depth 0) (set! depth (- depth 1))) + (advance-tok!) (step))) + (else (begin (advance-tok!) (step)))))) + (step))))) (define parse-decl-let (fn () diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 52ea6cf1..701dfb2c 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -82,6 +82,67 @@ match lst with | [] -> failwith \"List.nth: out of range\" | h :: t -> if n = 0 then h else nth t (n - 1) + + let rec concat lst = + match lst with + | [] -> [] + | h :: t -> append h (concat t) + + let flatten = concat + + let rec init n f = + if n = 0 then [] else + begin + let rec build i = + if i = n then [] else f i :: build (i + 1) + in build 0 + end + + let rec find_opt p lst = + match lst with + | [] -> None + | h :: t -> if p h then Some h else find_opt p t + + let rec find p lst = + match find_opt p lst with + | None -> failwith \"List.find: not found\" + | Some x -> x + + let rec partition p lst = + match lst with + | [] -> ([], []) + | h :: t -> + (match partition p t with + | (yes, no) -> + if p h then (h :: yes, no) else (yes, h :: no)) + + let rec mapi f lst = + begin + let rec go i xs = + match xs with + | [] -> [] + | h :: t -> f i h :: go (i + 1) t + in go 0 lst + end + + let rec iteri f lst = + begin + let rec go i xs = + match xs with + | [] -> () + | h :: t -> f i h; go (i + 1) t + in go 0 lst + end + + let rec assoc k lst = + match lst with + | [] -> failwith \"List.assoc: not found\" + | (k2, v) :: t -> if k = k2 then v else assoc k t + + let rec assoc_opt k lst = + match lst with + | [] -> None + | (k2, v) :: t -> if k = k2 then Some v else assoc_opt k t end ;; module Option = struct @@ -114,6 +175,21 @@ match o with | None -> false | Some _ -> true + + let iter f o = + match o with + | None -> () + | Some x -> f x + + let fold none_v f o = + match o with + | None -> none_v + | Some x -> f x + + let to_list o = + match o with + | None -> [] + | Some x -> [x] end ;; module Result = struct @@ -136,6 +212,26 @@ match r with | Ok _ -> false | Error _ -> true + + let get_ok r = + match r with + | Ok x -> x + | Error _ -> failwith \"Result.get_ok: Error\" + + let get_error r = + match r with + | Ok _ -> failwith \"Result.get_error: Ok\" + | Error e -> e + + let map_error f r = + match r with + | Ok x -> Ok x + | Error e -> Error (f e) + + let to_option r = + match r with + | Ok x -> Some x + | Error _ -> None end ;; module String = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 29ba8e52..e887b0c2 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -764,6 +764,40 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1323) (eval "(ocaml-run-program \"exception E of string ;; try raise (E \\\"oops\\\") with | E s -> s\")") +;; ── Phase 6 expanded stdlib (List/Option/Result extensions) ─── +(epoch 1400) +(eval "(ocaml-run \"List.concat [[1;2];[3];[4;5]]\")") +(epoch 1401) +(eval "(ocaml-run \"List.init 5 (fun i -> i * 10)\")") +(epoch 1402) +(eval "(ocaml-run \"List.find_opt (fun x -> x > 2) [1;2;3;4]\")") +(epoch 1403) +(eval "(ocaml-run \"List.find_opt (fun x -> x > 99) [1;2;3]\")") +(epoch 1404) +(eval "(ocaml-run \"List.mapi (fun i x -> i + x) [10;20;30]\")") +(epoch 1405) +(eval "(ocaml-run \"List.partition (fun x -> x > 2) [1;2;3;4]\")") +(epoch 1406) +(eval "(ocaml-run \"List.assoc 2 [(1, \\\"a\\\"); (2, \\\"b\\\"); (3, \\\"c\\\")]\")") +(epoch 1407) +(eval "(ocaml-run \"List.assoc_opt 99 [(1, \\\"a\\\")]\")") + +(epoch 1410) +(eval "(ocaml-run \"Option.fold 0 (fun x -> x * 10) (Some 7)\")") +(epoch 1411) +(eval "(ocaml-run \"Option.fold 0 (fun x -> x * 10) None\")") +(epoch 1412) +(eval "(ocaml-run \"Option.to_list (Some 7)\")") +(epoch 1413) +(eval "(ocaml-run \"Option.to_list None\")") + +(epoch 1420) +(eval "(ocaml-run \"Result.get_ok (Ok 42)\")") +(epoch 1421) +(eval "(ocaml-run \"Result.to_option (Ok 1)\")") +(epoch 1422) +(eval "(ocaml-run \"Result.map_error (fun e -> e + 1) (Error 5)\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1210,6 +1244,25 @@ check 1321 "exception arg" '("exception-def" "MyExn"' check 1322 "raise+catch with arg" '5' check 1323 "raise+catch string arg" '"oops"' +# ── Phase 6 expanded stdlib ───────────────────────────────────── +check 1400 "List.concat" '(1 2 3 4 5)' +check 1401 "List.init" '(0 10 20 30 40)' +check 1402 "List.find_opt found" '("Some" 3)' +check 1403 "List.find_opt missing" '("None")' +check 1404 "List.mapi" '(10 21 32)' +check 1405 "List.partition" '("tuple" (3 4) (1 2))' +check 1406 "List.assoc" '"b"' +check 1407 "List.assoc_opt missing" '("None")' + +check 1410 "Option.fold Some" '70' +check 1411 "Option.fold None" '0' +check 1412 "Option.to_list Some" '(7)' +check 1413 "Option.to_list None" '()' + +check 1420 "Result.get_ok" '42' +check 1421 "Result.to_option Ok" '("Some" 1)' +check 1422 "Result.map_error" '("Error" 6)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 417d34f7..75ad9e6a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -240,12 +240,13 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] `List`: `map`, `filter`, `fold_left`, `fold_right`, `length`, `rev`, `append`, `iter`, `for_all`, `exists`, `mem`, `nth`, `hd`, `tl`, - `rev_append`. _(Pending: concat/flatten, iteri/mapi, find/find_opt, - assoc/assq, sort, init, combine, split, partition.)_ -- [~] `Option`: `map`, `bind`, `value`, `get`, `is_none`, `is_some`. - _(Pending: fold/join/iter/to_list/to_result.)_ -- [~] `Result`: `map`, `bind`, `is_ok`, `is_error`. _(Pending: - fold/get_ok/get_error/map_error/to_option.)_ + `rev_append`, `concat`/`flatten`, `init`, `iteri`, `mapi`, `find`, + `find_opt`, `assoc`, `assoc_opt`, `partition`. _(Pending: + sort/stable_sort, combine, split.)_ +- [~] `Option`: `map`, `bind`, `value`, `get`, `is_none`, `is_some`, + `iter`, `fold`, `to_list`. _(Pending: join/to_result.)_ +- [~] `Result`: `map`, `bind`, `is_ok`, `is_error`, `get_ok`, + `get_error`, `map_error`, `to_option`. _(Pending: fold/join.)_ - [~] `String`: `length`, `get`, `sub`, `concat`, `uppercase_ascii`, `lowercase_ascii`, `starts_with`. _(Pending: split_on_char, trim, contains, ends_with, index_opt, replace_all.)_ @@ -360,6 +361,15 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — expanded stdlib slice (+15 tests, 319 total). + List: concat/flatten, init, find/find_opt, partition, mapi/iteri, + assoc/assoc_opt. Option: iter, fold, to_list. Result: get_ok, + get_error, map_error, to_option. Also fixed parser's + skip-to-boundary! to track `let..in` / `begin..end` / `struct..end` + / `for/while..done` nesting via a depth counter so nested let + expressions inside top-level decl bodies don't trip over the + decl-boundary detector. Stdlib functions like `init` use `begin..end` + to make nested-let intent explicit. - 2026-05-08 Phase 3 — `exception` declarations (+4 tests, 304 total). `exception NAME [of TYPE]` parses to `(:exception-def NAME [ARG-SRC])`. Runtime is a no-op: exception values are just tagged ctor values, so From a0abdcf5200fe434eb17cb7c39827537124ff72d Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:50:39 +0000 Subject: [PATCH 026/298] ocaml: refresh scoreboard (320/320 across 14 suites) --- lib/ocaml/scoreboard.json | 18 +++++++++--------- lib/ocaml/scoreboard.md | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 43ee048e..4c07725e 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -1,21 +1,21 @@ { "suites": { - "eval-core": {"pass": 47, "fail": 0}, + "eval-core": {"pass": 48, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 39, "fail": 0}, - "parser": {"pass": 85, "fail": 0}, + "misc": {"pass": 52, "fail": 0}, + "parser": {"pass": 87, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, - "phase2-exn": {"pass": 6, "fail": 0}, + "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, "phase2-loops": {"pass": 4, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, - "phase3-adt": {"pass": 13, "fail": 0}, + "phase3-adt": {"pass": 19, "fail": 0}, "phase4-modules": {"pass": 12, "fail": 0}, - "phase5-hm": {"pass": 17, "fail": 0}, - "phase6-stdlib": {"pass": 29, "fail": 0}, + "phase5-hm": {"pass": 19, "fail": 0}, + "phase6-stdlib": {"pass": 39, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 284, + "total_pass": 320, "total_fail": 0, - "total": 284 + "total": 320 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index f9f25839..11161d45 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,20 +1,20 @@ # OCaml-on-SX scoreboard -284 / 284 tests passing. +320 / 320 tests passing. | Suite | Pass | Fail | |---|---:|---:| -| eval-core | 47 | 0 | +| eval-core | 48 | 0 | | let-and | 3 | 0 | -| misc | 39 | 0 | -| parser | 85 | 0 | +| misc | 52 | 0 | +| parser | 87 | 0 | | phase1-params | 2 | 0 | -| phase2-exn | 6 | 0 | +| phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | | phase2-loops | 4 | 0 | | phase2-refs | 6 | 0 | -| phase3-adt | 13 | 0 | +| phase3-adt | 19 | 0 | | phase4-modules | 12 | 0 | -| phase5-hm | 17 | 0 | -| phase6-stdlib | 29 | 0 | +| phase5-hm | 19 | 0 | +| phase6-stdlib | 39 | 0 | | tokenize | 18 | 0 | From 6d7197182e8fbcb1d8f4054f3c8db48c2af9f18f Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:54:15 +0000 Subject: [PATCH 027/298] ocaml: phase 5 HM tuple + list types (+7 tests, 326 total) Tuple type (hm-con "*" TYPES); list type (hm-con "list" (TYPE)). ocaml-infer-tuple threads substitution through each item left-to-right. ocaml-infer-list unifies all items with a fresh 'a (giving 'a list for empty []). Pretty-printer renders 'Int * Int' for tuples and 'Int list' for lists, matching standard OCaml notation. Examples: fun x y -> (x, y) : 'a -> 'b -> 'a * 'b fun x -> [x; x] : 'a -> 'a list [] : 'a list --- lib/ocaml/infer.sx | 58 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 25 +++++++++++++++++++ plans/ocaml-on-sx.md | 10 +++++++- 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index 86cd4b39..a5b83c18 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -145,6 +145,56 @@ {:subst sf :type (hm-apply sf te)})))))))))))) +;; Tuple type: (hm-con "*" (list T1 T2 ...)). +(define ocaml-hm-tuple + (fn (types) (hm-con "*" types))) + +;; List type: (hm-con "list" (list ELEM)). +(define ocaml-hm-list + (fn (elem) (hm-con "list" (list elem)))) + +(define ocaml-infer-tuple + (fn (items env counter) + (let ((subst {}) (types (list))) + (begin + (define loop + (fn (xs env-cur) + (when (not (= xs (list))) + (let ((r (ocaml-infer (first xs) env-cur counter))) + (let ((s (get r :subst)) (t (get r :type))) + (begin + (set! subst (hm-compose s subst)) + (append! types t) + (loop (rest xs) (hm-apply-env s env-cur)))))))) + (loop items env) + {:subst subst + :type (ocaml-hm-tuple + (map (fn (t) (hm-apply subst t)) types))})))) + +(define ocaml-infer-list + (fn (items env counter) + (cond + ((= (len items) 0) + {:subst {} :type (ocaml-hm-list (hm-fresh-tv counter))}) + (else + (let ((subst {}) (elem-tv (hm-fresh-tv counter))) + (begin + (define loop + (fn (xs env-cur) + (when (not (= xs (list))) + (let ((r (ocaml-infer (first xs) env-cur counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify + (hm-apply s elem-tv) + t + (hm-compose s subst)))) + (begin + (set! subst s2) + (loop (rest xs) (hm-apply-env s2 env-cur))))))))) + (loop items env) + {:subst subst + :type (ocaml-hm-list (hm-apply subst elem-tv))})))))) + (set! ocaml-infer (fn (expr env counter) (let ((tag (nth expr 0))) @@ -162,6 +212,8 @@ (nth expr 3) (nth expr 4) env counter)) ((= tag "if") (ocaml-infer-if (nth expr 1) (nth expr 2) (nth expr 3) env counter)) + ((= tag "tuple") (ocaml-infer-tuple (rest expr) env counter)) + ((= tag "list") (ocaml-infer-list (rest expr) env counter)) ((= tag "neg") (let ((r (ocaml-infer (nth expr 1) env counter))) (let ((s (get r :subst)) (t (get r :type))) @@ -205,5 +257,11 @@ (str "(" (ocaml-hm-format-type a) ")")) (else (ocaml-hm-format-type a))) " -> " (ocaml-hm-format-type b)))) + ((= head "*") + (let ((parts (map ocaml-hm-format-type args))) + (join " * " parts))) + ((= head "list") + (let ((elem (ocaml-hm-format-type (nth args 0)))) + (str elem " list"))) (else head)))) (else (str t))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index e887b0c2..da64ab52 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -798,6 +798,22 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1422) (eval "(ocaml-run \"Result.map_error (fun e -> e + 1) (Error 5)\")") +;; ── HM extensions: tuples + lists ────────────────────────────── +(epoch 1500) +(eval "(ocaml-type-of \"(1, 2)\")") +(epoch 1501) +(eval "(ocaml-type-of \"(1, true, \\\"hi\\\")\")") +(epoch 1502) +(eval "(ocaml-type-of \"[1; 2; 3]\")") +(epoch 1503) +(eval "(ocaml-type-of \"[]\")") +(epoch 1504) +(eval "(ocaml-type-of \"fun x -> [x; x]\")") +(epoch 1505) +(eval "(ocaml-type-of \"fun x y -> (x, y)\")") +(epoch 1506) +(eval "(ocaml-type-of \"[true; false]\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1263,6 +1279,15 @@ check 1420 "Result.get_ok" '42' check 1421 "Result.to_option Ok" '("Some" 1)' check 1422 "Result.map_error" '("Error" 6)' +# ── HM tuples + lists ─────────────────────────────────────────── +check 1500 "type tuple Int*Int" '"Int * Int"' +check 1501 "type 3-tuple" '"Int * Bool * String"' +check 1502 "type Int list" '"Int list"' +check 1503 "type [] poly" ' list' +check 1504 "type fn -> list" 'list"' +check 1505 "type fn -> tuple" ' * ' +check 1506 "type Bool list" '"Bool list"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 75ad9e6a..cef10f0d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -228,7 +228,8 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] Type variables: `'a`, `'b`; unification with occur-check (kit). - [x] Let-polymorphism: generalise at let-bindings (kit `hm-generalize`). - [ ] ADT types: `type 'a option = None | Some of 'a`. -- [~] Function types `T1 -> T2` work; tuples/records pending. +- [~] Function types `T1 -> T2` work; tuples (`'a * 'b`) and lists + (`'a list`) supported. Records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. - [ ] Module type checking: seal against `sig` (Phase 4 stubs become real checks). - [ ] Error reporting: position-tagged errors with expected vs actual types. @@ -361,6 +362,13 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — HM extensions for tuples and lists (+7 tests, + 326 total). Tuple type `(hm-con "*" TYPES)`, list type `(hm-con + "list" (TYPE))`. `ocaml-infer-tuple` threads substitution through + each item; `ocaml-infer-list` unifies all elements with a fresh + `'a` (giving `'a list` for `[]`). Pretty-printer renders `Int * Int` + and `Int list` like real OCaml. `fun x y -> (x, y) : 'a -> 'b -> 'a + * 'b`. `fun x -> [x; x] : 'a -> 'a list`. - 2026-05-08 Phase 6 — expanded stdlib slice (+15 tests, 319 total). List: concat/flatten, init, find/find_opt, partition, mapi/iteri, assoc/assoc_opt. Option: iter, fold, to_list. Result: get_ok, From 812aa75d430b20d9617ae21c7996bcc99958792f Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:57:22 +0000 Subject: [PATCH 028/298] ocaml: phase 6 Hashtbl (+6 tests, 332 total) Backing store is a one-element list cell holding a SX dict; keys coerced to strings via str so int/string keys work uniformly. API: create, add, replace, find, find_opt, mem, length. _hashtbl_create / _hashtbl_add / _hashtbl_replace / _hashtbl_find_opt / _hashtbl_mem / _hashtbl_length primitives wired in eval.sx; OCaml-side Hashtbl module wraps them in lib/ocaml/runtime.sx. --- lib/ocaml/eval.sx | 25 ++++++++++++++++++++++++- lib/ocaml/runtime.sx | 13 +++++++++++++ lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 72e22faa..7835dd48 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -66,7 +66,30 @@ ;; Print: prints to host stdout via println. (list "print_string" (fn (s) (begin (print s) nil))) (list "print_endline" (fn (s) (begin (println s) nil))) - (list "print_int" (fn (i) (begin (print (str i)) nil)))))) + (list "print_int" (fn (i) (begin (print (str i)) nil))) + ;; Hashtbl primitives (one-element list cell holding a dict). + ;; Keys are coerced to strings via `str` so any value type works + ;; as a key (matches Hashtbl's polymorphic-key semantics). + (list "_hashtbl_create" (fn (n) (list {}))) + (list "_hashtbl_add" + (fn (t) (fn (k) (fn (v) + (begin + (set-nth! t 0 (merge (nth t 0) (dict (str k) v))) + nil))))) + (list "_hashtbl_replace" + (fn (t) (fn (k) (fn (v) + (begin + (set-nth! t 0 (merge (nth t 0) (dict (str k) v))) + nil))))) + (list "_hashtbl_find_opt" + (fn (t) (fn (k) + (cond + ((has-key? (nth t 0) (str k)) (list "Some" (get (nth t 0) (str k)))) + (else (list "None")))))) + (list "_hashtbl_mem" + (fn (t) (fn (k) (has-key? (nth t 0) (str k))))) + (list "_hashtbl_length" + (fn (t) (len (keys (nth t 0)))))))) (define ocaml-env-lookup (fn (env name) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 701dfb2c..8ad9cdc4 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -266,6 +266,19 @@ module Printf = struct let sprintf fmt = fmt let printf fmt = print_string fmt + end ;; + + module Hashtbl = struct + let create n = _hashtbl_create n + let add t k v = _hashtbl_add t k v + let replace t k v = _hashtbl_replace t k v + let find_opt t k = _hashtbl_find_opt t k + let find t k = + match _hashtbl_find_opt t k with + | None -> failwith \"Hashtbl.find: not found\" + | Some v -> v + let mem t k = _hashtbl_mem t k + let length t = _hashtbl_length t end") (define ocaml-stdlib-loaded false) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index da64ab52..31587ac2 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -814,6 +814,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1506) (eval "(ocaml-type-of \"[true; false]\")") +;; ── Hashtbl ──────────────────────────────────────────────────── +(epoch 1600) +(eval "(ocaml-run-program \"let t = Hashtbl.create 10;; Hashtbl.add t \\\"x\\\" 42;; Hashtbl.find t \\\"x\\\"\")") +(epoch 1601) +(eval "(ocaml-run-program \"let t = Hashtbl.create 10;; Hashtbl.add t 1 \\\"a\\\";; Hashtbl.add t 2 \\\"b\\\";; Hashtbl.length t\")") +(epoch 1602) +(eval "(ocaml-run-program \"let t = Hashtbl.create 10;; Hashtbl.add t 1 100;; Hashtbl.find_opt t 99\")") +(epoch 1603) +(eval "(ocaml-run-program \"let t = Hashtbl.create 10;; Hashtbl.add t 1 100;; Hashtbl.mem t 1\")") +(epoch 1604) +(eval "(ocaml-run-program \"let t = Hashtbl.create 10;; Hashtbl.add t \\\"a\\\" 1;; Hashtbl.replace t \\\"a\\\" 2;; Hashtbl.find t \\\"a\\\"\")") +(epoch 1605) +(eval "(ocaml-run-program \"let t = Hashtbl.create 10;; Hashtbl.add t \\\"k\\\" 5;; Hashtbl.find_opt t \\\"k\\\"\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1288,6 +1302,14 @@ check 1504 "type fn -> list" 'list"' check 1505 "type fn -> tuple" ' * ' check 1506 "type Bool list" '"Bool list"' +# ── Hashtbl ───────────────────────────────────────────────────── +check 1600 "Hashtbl find" '42' +check 1601 "Hashtbl length" '2' +check 1602 "Hashtbl find_opt missing" '("None")' +check 1603 "Hashtbl mem" 'true' +check 1604 "Hashtbl replace" '2' +check 1605 "Hashtbl find_opt found" '("Some" 5)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index cef10f0d..961e5b05 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -248,6 +248,9 @@ SX CEK evaluator (both JS and OCaml hosts) `iter`, `fold`, `to_list`. _(Pending: join/to_result.)_ - [~] `Result`: `map`, `bind`, `is_ok`, `is_error`, `get_ok`, `get_error`, `map_error`, `to_option`. _(Pending: fold/join.)_ +- [~] `Hashtbl`: `create`, `add`, `find`, `find_opt`, `replace`, `mem`, + `length`. Backed by a one-element list cell holding a SX dict; + keys coerced to strings via `str` for polymorphic-key support. - [~] `String`: `length`, `get`, `sub`, `concat`, `uppercase_ascii`, `lowercase_ascii`, `starts_with`. _(Pending: split_on_char, trim, contains, ends_with, index_opt, replace_all.)_ @@ -362,6 +365,10 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — `Hashtbl` (+6 tests, 332 total). Backing store is + a one-element list cell holding a SX dict; keys are coerced to + strings via `str` so any value type can serve as a key. API: create, + add, replace, find, find_opt, mem, length. - 2026-05-08 Phase 5 — HM extensions for tuples and lists (+7 tests, 326 total). Tuple type `(hm-con "*" TYPES)`, list type `(hm-con "list" (TYPE))`. `ocaml-infer-tuple` threads substitution through From 202ea9cf5f5b7d7f9e6072ef277b2aa24dd362e2 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 12:59:50 +0000 Subject: [PATCH 029/298] ocaml: phase 6 List.sort + compare (+7 tests, 339 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit compare is a host builtin returning -1/0/1 (Stdlib.compare semantics) deferred to host SX . List.sort is insertion-sort in OCaml: O(n²) but works correctly. List.stable_sort = sort. Tested: ascending int sort, descending via custom comparator (b - a), empty list, string sort. --- lib/ocaml/eval.sx | 8 ++++++++ lib/ocaml/runtime.sx | 14 ++++++++++++++ lib/ocaml/test.sh | 25 +++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++-- 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 7835dd48..74032419 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -67,6 +67,14 @@ (list "print_string" (fn (s) (begin (print s) nil))) (list "print_endline" (fn (s) (begin (println s) nil))) (list "print_int" (fn (i) (begin (print (str i)) nil))) + ;; Polymorphic compare — returns negative / 0 / positive like + ;; OCaml's Stdlib.compare. Defers to host SX `<` and `>`. + (list "compare" + (fn (a) (fn (b) + (cond + ((< a b) -1) + ((> a b) 1) + (else 0))))) ;; Hashtbl primitives (one-element list cell holding a dict). ;; Keys are coerced to strings via `str` so any value type works ;; as a key (matches Hashtbl's polymorphic-key semantics). diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 8ad9cdc4..28e101a7 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -143,6 +143,20 @@ match lst with | [] -> None | (k2, v) :: t -> if k = k2 then Some v else assoc_opt k t + + let rec sort cmp xs = + begin + let rec insert x ys = + match ys with + | [] -> [x] + | h :: t -> if cmp x h <= 0 then x :: ys else h :: insert x t + in + match xs with + | [] -> [] + | h :: t -> insert h (sort cmp t) + end + + let stable_sort = sort end ;; module Option = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 31587ac2..03076b0b 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -828,6 +828,22 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1605) (eval "(ocaml-run-program \"let t = Hashtbl.create 10;; Hashtbl.add t \\\"k\\\" 5;; Hashtbl.find_opt t \\\"k\\\"\")") +;; ── List.sort + compare ──────────────────────────────────────── +(epoch 1700) +(eval "(ocaml-run \"compare 1 2\")") +(epoch 1701) +(eval "(ocaml-run \"compare 5 5\")") +(epoch 1702) +(eval "(ocaml-run \"compare 9 1\")") +(epoch 1703) +(eval "(ocaml-run \"List.sort compare [3; 1; 4; 1; 5; 9; 2; 6]\")") +(epoch 1704) +(eval "(ocaml-run \"List.sort (fun a b -> b - a) [3; 1; 4]\")") +(epoch 1705) +(eval "(ocaml-run \"List.sort compare []\")") +(epoch 1706) +(eval "(ocaml-run \"List.sort compare [\\\"b\\\"; \\\"a\\\"; \\\"c\\\"]\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1310,6 +1326,15 @@ check 1603 "Hashtbl mem" 'true' check 1604 "Hashtbl replace" '2' check 1605 "Hashtbl find_opt found" '("Some" 5)' +# ── List.sort + compare ───────────────────────────────────────── +check 1700 "compare 1<2" '-1' +check 1701 "compare 5=5" '0' +check 1702 "compare 9>1" '1' +check 1703 "List.sort ascending" '(1 1 2 3 4 5 6 9)' +check 1704 "List.sort descending" '(4 3 1)' +check 1705 "List.sort empty" '()' +check 1706 "List.sort strings" '("a" "b" "c")' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 961e5b05..5ef6223c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -242,8 +242,8 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] `List`: `map`, `filter`, `fold_left`, `fold_right`, `length`, `rev`, `append`, `iter`, `for_all`, `exists`, `mem`, `nth`, `hd`, `tl`, `rev_append`, `concat`/`flatten`, `init`, `iteri`, `mapi`, `find`, - `find_opt`, `assoc`, `assoc_opt`, `partition`. _(Pending: - sort/stable_sort, combine, split.)_ + `find_opt`, `assoc`, `assoc_opt`, `partition`, `sort`, + `stable_sort` (insertion sort, O(n²)). _(Pending: combine, split.)_ - [~] `Option`: `map`, `bind`, `value`, `get`, `is_none`, `is_some`, `iter`, `fold`, `to_list`. _(Pending: join/to_result.)_ - [~] `Result`: `map`, `bind`, `is_ok`, `is_error`, `get_ok`, @@ -365,6 +365,11 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — `List.sort` + polymorphic `compare` (+7 tests, + 339 total). `compare` is a host primitive that returns -1/0/1 like + Stdlib.compare, defers to host SX `<`/`>`. `List.sort` is implemented + in OCaml as insertion sort: O(n²) but correct, and passes all tests + including descending custom comparator and string sort. - 2026-05-08 Phase 6 — `Hashtbl` (+6 tests, 332 total). Backing store is a one-element list cell holding a SX dict; keys are coerced to strings via `str` so any value type can serve as a key. API: create, From d2bf0c0d003ebcfc7027b50ac44d433603e87409 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:02:15 +0000 Subject: [PATCH 030/298] ocaml: phase 5 HM pattern-match inference (+5 tests, 344 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocaml-infer-pat covers :pwild, :pvar, :plit, :pcons, :plist, :ptuple, :pas. Returns {:type T :env ENV2 :subst S} where ENV2 has the pattern's bound names threaded through. ocaml-infer-match unifies each clause's pattern type with the scrutinee, runs the body in the env extended with pattern bindings, and unifies all body types via a fresh result tv. Examples: fun lst -> match lst with | [] -> 0 | h :: _ -> h : Int list -> Int match (1, 2) with | (a, b) -> a + b : Int Constructor patterns (:pcon) fall through to a fresh tv for now — proper handling needs a ctor type registry from 'type' declarations. --- lib/ocaml/infer.sx | 108 +++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 19 ++++++++ plans/ocaml-on-sx.md | 8 ++++ 3 files changed, 135 insertions(+) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index a5b83c18..212e80ee 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -171,6 +171,113 @@ :type (ocaml-hm-tuple (map (fn (t) (hm-apply subst t)) types))})))) +;; Pattern type inference. Returns {:type T :env ENV2 :subst S} where +;; ENV2 is the original env extended with any names the pattern binds. +;; Constructor patterns aren't supported here yet (need a type-def +;; registry) — :pcon falls through to a fresh tv so they don't break +;; inference of mixed clauses. +(define ocaml-infer-pat + (fn (pat env counter) + (let ((tag (nth pat 0))) + (cond + ((= tag "pwild") + (let ((tv (hm-fresh-tv counter))) + {:type tv :env env :subst {}})) + ((= tag "pvar") + (let ((nm (nth pat 1)) (tv (hm-fresh-tv counter))) + {:type tv :env (assoc env nm (hm-monotype tv)) :subst {}})) + ((= tag "plit") + (let ((r (ocaml-infer (nth pat 1) env counter))) + {:type (get r :type) :env env :subst (get r :subst)})) + ((= tag "pcons") + (let ((rh (ocaml-infer-pat (nth pat 1) env counter))) + (let ((rt (ocaml-infer-pat (nth pat 2) (get rh :env) counter))) + (let ((s (ocaml-hm-unify + (ocaml-hm-list (get rh :type)) + (get rt :type) + (hm-compose (get rt :subst) (get rh :subst))))) + {:type (hm-apply s (ocaml-hm-list (get rh :type))) + :env (get rt :env) + :subst s})))) + ((= tag "plist") + (let ((items (rest pat)) (tv (hm-fresh-tv counter)) (env-cur env) (subst {})) + (begin + (define loop + (fn (xs) + (when (not (= xs (list))) + (let ((rp (ocaml-infer-pat (first xs) env-cur counter))) + (let ((s (ocaml-hm-unify + (hm-apply (get rp :subst) tv) + (get rp :type) + (hm-compose (get rp :subst) subst)))) + (begin + (set! subst s) + (set! env-cur (get rp :env)) + (loop (rest xs)))))))) + (loop items) + {:type (hm-apply subst (ocaml-hm-list tv)) + :env env-cur + :subst subst}))) + ((= tag "ptuple") + (let ((items (rest pat)) (env-cur env) (subst {}) (types (list))) + (begin + (define loop + (fn (xs) + (when (not (= xs (list))) + (let ((rp (ocaml-infer-pat (first xs) env-cur counter))) + (begin + (set! subst (hm-compose (get rp :subst) subst)) + (append! types (get rp :type)) + (set! env-cur (get rp :env)) + (loop (rest xs))))))) + (loop items) + {:type (ocaml-hm-tuple + (map (fn (t) (hm-apply subst t)) types)) + :env env-cur + :subst subst}))) + ((= tag "pas") + (let ((rp (ocaml-infer-pat (nth pat 1) env counter))) + (let ((alias (nth pat 2))) + {:type (get rp :type) + :env (assoc (get rp :env) alias (hm-monotype (get rp :type))) + :subst (get rp :subst)}))) + (else + ;; :pcon and others — fall through to a fresh tv (sound but loose). + (let ((tv (hm-fresh-tv counter))) + {:type tv :env env :subst {}})))))) + +(define ocaml-infer-match + (fn (scrut clauses env counter) + (let ((rs (ocaml-infer scrut env counter))) + (let ((s (get rs :subst)) (st (get rs :type)) (result-tv (hm-fresh-tv counter))) + (let ((subst s)) + (begin + (define loop + (fn (cs) + (when (not (= cs (list))) + (let ((clause (first cs))) + (let ((ctag (nth clause 0))) + (let ((p (nth clause 1)) + (body (cond + ((= ctag "case") (nth clause 2)) + (else (nth clause 3))))) + (let ((rp (ocaml-infer-pat p (hm-apply-env subst env) counter))) + (let ((s1 (ocaml-hm-unify + (hm-apply (get rp :subst) st) + (get rp :type) + (hm-compose (get rp :subst) subst)))) + (let ((rb (ocaml-infer body + (hm-apply-env s1 (get rp :env)) counter))) + (let ((s2 (ocaml-hm-unify + (hm-apply (get rb :subst) result-tv) + (get rb :type) + (hm-compose (get rb :subst) s1)))) + (begin + (set! subst s2) + (loop (rest cs))))))))))))) + (loop clauses) + {:subst subst :type (hm-apply subst result-tv)})))))) + (define ocaml-infer-list (fn (items env counter) (cond @@ -214,6 +321,7 @@ (nth expr 3) env counter)) ((= tag "tuple") (ocaml-infer-tuple (rest expr) env counter)) ((= tag "list") (ocaml-infer-list (rest expr) env counter)) + ((= tag "match") (ocaml-infer-match (nth expr 1) (nth expr 2) env counter)) ((= tag "neg") (let ((r (ocaml-infer (nth expr 1) env counter))) (let ((s (get r :subst)) (t (get r :type))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 03076b0b..552b33c9 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -844,6 +844,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1706) (eval "(ocaml-run \"List.sort compare [\\\"b\\\"; \\\"a\\\"; \\\"c\\\"]\")") +;; ── HM pattern-match inference ───────────────────────────────── +(epoch 1800) +(eval "(ocaml-type-of \"match 1 with | n -> n + 1\")") +(epoch 1801) +(eval "(ocaml-type-of \"match [1;2] with | [] -> 0 | h :: t -> h\")") +(epoch 1802) +(eval "(ocaml-type-of \"match (1, 2) with | (a, b) -> a + b\")") +(epoch 1803) +(eval "(ocaml-type-of \"fun x -> match x with | 0 -> 0 | n -> n + 1\")") +(epoch 1804) +(eval "(ocaml-type-of \"fun lst -> match lst with | [] -> 0 | h :: _ -> h\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1335,6 +1347,13 @@ check 1704 "List.sort descending" '(4 3 1)' check 1705 "List.sort empty" '()' check 1706 "List.sort strings" '("a" "b" "c")' +# ── HM match inference ────────────────────────────────────────── +check 1800 "match int" '"Int"' +check 1801 "match list" '"Int"' +check 1802 "match tuple" '"Int"' +check 1803 "fn match int -> int" '"Int -> Int"' +check 1804 "fn list -> elem" '"Int list -> Int"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5ef6223c..170c85e8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -365,6 +365,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — HM pattern-matching inference (+5 tests, 344 + total). `ocaml-infer-pat` covers wild, var, lit, cons, list, tuple, + as. `ocaml-infer-match` unifies each clause's pattern type with the + scrutinee, runs the body in the env extended with pattern-bound vars, + and unifies all body types via a fresh result tv. Examples: + `fun lst -> match lst with | [] -> 0 | h :: _ -> h : Int list -> Int`. + Constructor patterns fall through to a fresh tv for now (need a ctor + type registry from `type` decls — pending). - 2026-05-08 Phase 6 — `List.sort` + polymorphic `compare` (+7 tests, 339 total). `compare` is a host primitive that returns -1/0/1 like Stdlib.compare, defers to host SX `<`/`>`. `List.sort` is implemented From 81247eb6ea4c3ece22ca3dca556c65b69ed4b77e Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:05:22 +0000 Subject: [PATCH 031/298] ocaml: phase 5 HM ctor inference for option/result (+7 tests, 351 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocaml-hm-ctor-env registers None/Some : 'a -> 'a option, Ok/Error : 'a -> ('a, 'b) result. :con NAME instantiates the scheme; :pcon NAME ARG-PATS walks arg patterns through the constructor's arrow type, unifying each. Pretty-printer renders 'Int option' and '(Int, 'b) result'. Examples now infer: fun x -> Some x : 'a -> 'a option match Some 5 with | None -> 0 | Some n -> n : Int fun o -> match o with | None -> 0 | Some n -> n : Int option -> Int Ok 1 : (Int, 'b) result Error "oops" : ('a, String) result User type-defs would extend the registry — pending. --- lib/ocaml/infer.sx | 73 +++++++++++++++++++++++++++++++++++++++++++- lib/ocaml/test.sh | 25 +++++++++++++++ plans/ocaml-on-sx.md | 13 +++++++- 3 files changed, 109 insertions(+), 2 deletions(-) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index 212e80ee..081aab0d 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -22,6 +22,21 @@ (define ocaml-hm-empty-subst (fn () {})) +;; A registry of constructor types so :con / :pcon can be inferred. +;; OCaml's stdlib ctors are seeded here; user type-defs would extend +;; this in a future iteration. +(define ocaml-hm-ctor-env + (fn () + (let ((a (hm-tv "a")) (b (hm-tv "b"))) + (let ((opt-of-a (hm-con "option" (list a))) + (res-of-ab (hm-con "result" (list a b)))) + {"None" (hm-scheme (list "a") opt-of-a) + "Some" (hm-scheme (list "a") (hm-arrow a opt-of-a)) + "Ok" (hm-scheme (list "a" "b") (hm-arrow a res-of-ab)) + "Error" (hm-scheme (list "a" "b") (hm-arrow b res-of-ab)) + "true" (hm-monotype (hm-bool)) + "false" (hm-monotype (hm-bool))})))) + (define ocaml-hm-builtin-env (fn () (let ((int-int-int (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-int)))) @@ -176,6 +191,42 @@ ;; Constructor patterns aren't supported here yet (need a type-def ;; registry) — :pcon falls through to a fresh tv so they don't break ;; inference of mixed clauses. +(define ocaml-infer-pcon + (fn (name arg-pats env counter) + (cond + ((has-key? ocaml-hm-ctors name) + (let ((ctor-type (hm-instantiate (get ocaml-hm-ctors name) counter)) + (env-cur env) (subst {})) + (let ((cur-type (list nil))) + (begin + (set-nth! cur-type 0 ctor-type) + (define loop + (fn (xs) + (when (not (= xs (list))) + (let ((rp (ocaml-infer-pat (first xs) env-cur counter))) + (let ((arg-tv (hm-fresh-tv counter)) + (res-tv (hm-fresh-tv counter))) + (let ((s1 (ocaml-hm-unify + (nth cur-type 0) + (hm-arrow arg-tv res-tv) + (hm-compose (get rp :subst) subst)))) + (let ((s2 (ocaml-hm-unify + (hm-apply s1 arg-tv) + (hm-apply s1 (get rp :type)) + s1))) + (begin + (set! subst s2) + (set-nth! cur-type 0 (hm-apply s2 res-tv)) + (set! env-cur (get rp :env)) + (loop (rest xs)))))))))) + (loop arg-pats) + {:type (hm-apply subst (nth cur-type 0)) + :env env-cur + :subst subst})))) + (else + (let ((tv (hm-fresh-tv counter))) + {:type tv :env env :subst {}}))))) + (define ocaml-infer-pat (fn (pat env counter) (let ((tag (nth pat 0))) @@ -241,8 +292,9 @@ {:type (get rp :type) :env (assoc (get rp :env) alias (hm-monotype (get rp :type))) :subst (get rp :subst)}))) + ((= tag "pcon") + (ocaml-infer-pcon (nth pat 1) (rest (rest pat)) env counter)) (else - ;; :pcon and others — fall through to a fresh tv (sound but loose). (let ((tv (hm-fresh-tv counter))) {:type tv :env env :subst {}})))))) @@ -302,10 +354,22 @@ {:subst subst :type (ocaml-hm-list (hm-apply subst elem-tv))})))))) +(define ocaml-hm-ctors (ocaml-hm-ctor-env)) + (set! ocaml-infer (fn (expr env counter) (let ((tag (nth expr 0))) (cond + ((= tag "con") + ;; (:con NAME) — look up constructor type, instantiate fresh. + (let ((name (nth expr 1))) + (cond + ((has-key? ocaml-hm-ctors name) + {:subst {} + :type (hm-instantiate (get ocaml-hm-ctors name) counter)}) + (else + ;; Unknown ctor — treat as a fresh polymorphic type. + {:subst {} :type (hm-fresh-tv counter)})))) ((= tag "int") {:subst {} :type (hm-int)}) ((= tag "float") {:subst {} :type (hm-int)}) ;; treat float as int for now ((= tag "string") {:subst {} :type (hm-string)}) @@ -371,5 +435,12 @@ ((= head "list") (let ((elem (ocaml-hm-format-type (nth args 0)))) (str elem " list"))) + ((= head "option") + (let ((elem (ocaml-hm-format-type (nth args 0)))) + (str elem " option"))) + ((= head "result") + (let ((a (ocaml-hm-format-type (nth args 0))) + (b (ocaml-hm-format-type (nth args 1)))) + (str "(" a ", " b ") result"))) (else head)))) (else (str t))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 552b33c9..b1425436 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -856,6 +856,22 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1804) (eval "(ocaml-type-of \"fun lst -> match lst with | [] -> 0 | h :: _ -> h\")") +;; ── HM constructor inference (option/result) ─────────────────── +(epoch 1900) +(eval "(ocaml-type-of \"Some 5\")") +(epoch 1901) +(eval "(ocaml-type-of \"None\")") +(epoch 1902) +(eval "(ocaml-type-of \"Ok 1\")") +(epoch 1903) +(eval "(ocaml-type-of \"Error \\\"oops\\\"\")") +(epoch 1904) +(eval "(ocaml-type-of \"fun x -> Some x\")") +(epoch 1905) +(eval "(ocaml-type-of \"match Some 5 with | None -> 0 | Some n -> n\")") +(epoch 1906) +(eval "(ocaml-type-of \"fun o -> match o with | None -> 0 | Some n -> n\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1354,6 +1370,15 @@ check 1802 "match tuple" '"Int"' check 1803 "fn match int -> int" '"Int -> Int"' check 1804 "fn list -> elem" '"Int list -> Int"' +# ── HM ctor inference ────────────────────────────────────────── +check 1900 "Some 5 : Int option" '"Int option"' +check 1901 "None : 'a option" ' option' +check 1902 "Ok 1 : (Int, 'b) result" '"(Int' +check 1903 "Error 'oops'" 'String) result' +check 1904 "fun x -> Some x" ' option' +check 1905 "match Some/None -> Int" '"Int"' +check 1906 "Int option -> Int" '"Int option -> Int"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 170c85e8..c90f0701 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -227,7 +227,10 @@ SX CEK evaluator (both JS and OCaml hosts) pattern matching, let-rec, modules.)_ - [x] Type variables: `'a`, `'b`; unification with occur-check (kit). - [x] Let-polymorphism: generalise at let-bindings (kit `hm-generalize`). -- [ ] ADT types: `type 'a option = None | Some of 'a`. +- [~] ADT types: `option`/`result` ctors hardcoded in + `ocaml-hm-ctor-env`; `:con NAME` and `:pcon NAME …` look up the + scheme and instantiate. User type-defs would extend the registry. + Format: `Int option`, `(Int, 'b) result`. - [~] Function types `T1 -> T2` work; tuples (`'a * 'b`) and lists (`'a list`) supported. Records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. @@ -365,6 +368,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — HM constructor inference for option/result (+7 + tests, 351 total). `ocaml-hm-ctor-env` registers None/Some (`'a opt`), + Ok/Error (`('a, 'b) result`). `:con NAME` instantiates the scheme; + `:pcon NAME ARG-PATS` walks arg patterns through the constructor's + arrow type, unifying each. Pretty-printer renders `Int option` and + `(Int, 'b) result`. Examples: + `fun x -> Some x : 'a -> 'a option` + `fun o -> match o with | None -> 0 | Some n -> n : Int option -> Int` - 2026-05-08 Phase 5 — HM pattern-matching inference (+5 tests, 344 total). `ocaml-infer-pat` covers wild, var, lit, cons, list, tuple, as. `ocaml-infer-match` unifies each clause's pattern type with the From 5bc7895ce0c74219767a57a670fc3ed5165aa0d1 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:08:51 +0000 Subject: [PATCH 032/298] ocaml: phase 5 HM let-rec + cons / append op types (+6 tests, 357 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocaml-infer-let-rec pre-binds the function name to a fresh tv before inferring rhs (which may recursively call the name), unifies the inferred rhs type with the tv, generalizes, then infers body. Builtin env types :: : 'a -> 'a list -> 'a list and @ : 'a list -> 'a list -> 'a list — needed because :op compiles to (:app (:app (:var OP) L) R) and previously these var lookups failed. Examples now infer: let rec fact n = if ... in fact : Int -> Int let rec len lst = ... in len : 'a list -> Int let rec map f xs = ... in map : ('a -> 'b) -> 'a list -> 'b list 1 :: [2; 3] : Int list let rec sum lst = ... in sum [1;2;3] : Int Scoreboard refreshed: 358/358 across 14 suites. --- lib/ocaml/infer.sx | 38 +++++++++++++++++++++++++++++++++++++- lib/ocaml/scoreboard.json | 16 ++++++++-------- lib/ocaml/scoreboard.md | 14 +++++++------- lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++++ 5 files changed, 86 insertions(+), 16 deletions(-) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index 081aab0d..211c880f 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -49,7 +49,19 @@ (hm-arrow a (hm-arrow a (hm-bool)))))) (a->a (let ((a (hm-tv "a"))) - (hm-scheme (list "a") (hm-arrow a a))))) + (hm-scheme (list "a") (hm-arrow a a)))) + (cons-type + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") + (hm-arrow a + (hm-arrow (hm-con "list" (list a)) + (hm-con "list" (list a))))))) + (concat-type + (let ((a (hm-tv "a"))) + (hm-scheme (list "a") + (hm-arrow (hm-con "list" (list a)) + (hm-arrow (hm-con "list" (list a)) + (hm-con "list" (list a)))))))) {"+" (hm-monotype int-int-int) "-" (hm-monotype int-int-int) "*" (hm-monotype int-int-int) @@ -66,6 +78,8 @@ "&&" (hm-monotype bool-bool-bool) "||" (hm-monotype bool-bool-bool) "^" (hm-monotype str-str-str) + "::" cons-type + "@" concat-type "not" (hm-monotype (hm-arrow (hm-bool) (hm-bool))) "succ" (hm-monotype (hm-arrow (hm-int) (hm-int))) "pred" (hm-monotype (hm-arrow (hm-int) (hm-int))) @@ -142,6 +156,26 @@ (let ((s2 (get r2 :subst)) (t2 (get r2 :type))) {:subst (hm-compose s2 s1) :type t2})))))))))) +;; let-rec name params = rhs in body — bind name to a fresh tv before +;; inferring rhs, then unify the inferred rhs type with the tv. This +;; lets rhs reference name (recursive call). Generalize after. +(define ocaml-infer-let-rec + (fn (name params rhs body env counter) + (let ((rhs-expr (cond + ((= (len params) 0) rhs) + (else (list :fun params rhs)))) + (rec-tv (hm-fresh-tv counter))) + (let ((env-rec (assoc env name (hm-monotype rec-tv)))) + (let ((r1 (ocaml-infer rhs-expr env-rec counter))) + (let ((s1 (get r1 :subst)) (t1 (get r1 :type))) + (let ((s2 (ocaml-hm-unify (hm-apply s1 rec-tv) t1 s1))) + (let ((env2 (hm-apply-env s2 env))) + (let ((scheme (hm-generalize (hm-apply s2 t1) env2))) + (let ((env3 (assoc env2 name scheme))) + (let ((r2 (ocaml-infer body env3 counter))) + (let ((s3 (get r2 :subst)) (t2 (get r2 :type))) + {:subst (hm-compose s3 s2) :type t2})))))))))))) + (define ocaml-infer-if (fn (c-ast t-ast e-ast env counter) (let ((rc (ocaml-infer c-ast env counter))) @@ -381,6 +415,8 @@ ((= tag "app") (ocaml-infer-app (nth expr 1) (nth expr 2) env counter)) ((= tag "let") (ocaml-infer-let (nth expr 1) (nth expr 2) (nth expr 3) (nth expr 4) env counter)) + ((= tag "let-rec") (ocaml-infer-let-rec (nth expr 1) (nth expr 2) + (nth expr 3) (nth expr 4) env counter)) ((= tag "if") (ocaml-infer-if (nth expr 1) (nth expr 2) (nth expr 3) env counter)) ((= tag "tuple") (ocaml-infer-tuple (rest expr) env counter)) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 4c07725e..97cb623d 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -1,21 +1,21 @@ { "suites": { - "eval-core": {"pass": 48, "fail": 0}, + "eval-core": {"pass": 50, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 52, "fail": 0}, - "parser": {"pass": 87, "fail": 0}, + "misc": {"pass": 61, "fail": 0}, + "parser": {"pass": 93, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, "phase2-loops": {"pass": 4, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, - "phase3-adt": {"pass": 19, "fail": 0}, + "phase3-adt": {"pass": 24, "fail": 0}, "phase4-modules": {"pass": 12, "fail": 0}, - "phase5-hm": {"pass": 19, "fail": 0}, - "phase6-stdlib": {"pass": 39, "fail": 0}, + "phase5-hm": {"pass": 31, "fail": 0}, + "phase6-stdlib": {"pass": 43, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 320, + "total_pass": 358, "total_fail": 0, - "total": 320 + "total": 358 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index 11161d45..00747ecb 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,20 +1,20 @@ # OCaml-on-SX scoreboard -320 / 320 tests passing. +358 / 358 tests passing. | Suite | Pass | Fail | |---|---:|---:| -| eval-core | 48 | 0 | +| eval-core | 50 | 0 | | let-and | 3 | 0 | -| misc | 52 | 0 | -| parser | 87 | 0 | +| misc | 61 | 0 | +| parser | 93 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | | phase2-loops | 4 | 0 | | phase2-refs | 6 | 0 | -| phase3-adt | 19 | 0 | +| phase3-adt | 24 | 0 | | phase4-modules | 12 | 0 | -| phase5-hm | 19 | 0 | -| phase6-stdlib | 39 | 0 | +| phase5-hm | 31 | 0 | +| phase6-stdlib | 43 | 0 | | tokenize | 18 | 0 | diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index b1425436..7ebdec78 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -872,6 +872,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 1906) (eval "(ocaml-type-of \"fun o -> match o with | None -> 0 | Some n -> n\")") +;; ── HM let-rec inference + cons / append ────────────────────── +(epoch 2000) +(eval "(ocaml-type-of \"let rec fact n = if n = 0 then 1 else n * fact (n - 1) in fact\")") +(epoch 2001) +(eval "(ocaml-type-of \"let rec len lst = match lst with | [] -> 0 | _ :: t -> 1 + len t in len\")") +(epoch 2002) +(eval "(ocaml-type-of \"let rec map f xs = match xs with | [] -> [] | h :: t -> f h :: map f t in map\")") +(epoch 2003) +(eval "(ocaml-type-of \"1 :: [2; 3]\")") +(epoch 2004) +(eval "(ocaml-type-of \"[1] @ [2; 3]\")") +(epoch 2005) +(eval "(ocaml-type-of \"let rec sum lst = match lst with | [] -> 0 | h :: t -> h + sum t in sum [1; 2; 3]\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1379,6 +1393,14 @@ check 1904 "fun x -> Some x" ' option' check 1905 "match Some/None -> Int" '"Int"' check 1906 "Int option -> Int" '"Int option -> Int"' +# ── HM let-rec + :: / @ ───────────────────────────────────────── +check 2000 "type fact" '"Int -> Int"' +check 2001 "type len" 'list -> Int' +check 2002 "type map" 'list -> ' +check 2003 "type 1::list" '"Int list"' +check 2004 "type [1] @ [2;3]" '"Int list"' +check 2005 "type sum" '"Int"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c90f0701..ec4b930b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -368,6 +368,18 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — HM let-rec inference + `::`/`@` operator types + (+6 tests, 357 total). `ocaml-infer-let-rec` pre-binds the function + name to a fresh tv, infers rhs (which can recursively call name), + unifies inferred-rhs-type with the tv, generalizes, then infers + body. Builtin env now types `:: : 'a -> 'a list -> 'a list` and + `@ : 'a list -> 'a list -> 'a list`. Now `let rec fact …`, + `let rec map f xs = match xs with … h :: t -> f h :: map f t`, and + `let rec sum …` all infer correctly: + `fact : Int -> Int` + `len : 'a list -> Int` + `map : ('a -> 'b) -> 'a list -> 'b list` + `sum [1;2;3] : Int` - 2026-05-08 Phase 5 — HM constructor inference for option/result (+7 tests, 351 total). `ocaml-hm-ctor-env` registers None/Some (`'a opt`), Ok/Error (`('a, 'b) result`). `:con NAME` instantiates the scheme; From 756d5fba6412c88639dbbb023e3abe13469be90c Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:12:07 +0000 Subject: [PATCH 033/298] ocaml: phase 5 HM with user type declarations (+6 tests, 363 total) ocaml-hm-ctors is now a mutable list cell; user type-defs register their constructors via ocaml-hm-register-type-def!. New ocaml-type-of-program processes top-level decls in order: - type-def: register ctors with the scheme inferred from PARAMS+CTORS - def/def-rec: generalize and bind in the type env - exception-def: no-op for typing - expr: return inferred type Examples: type color = Red | Green | Blue;; Red : color type shape = Circle of int | Square of int;; let area s = match s with | Circle r -> r * r | Square s -> s * s;; area : shape -> Int Caveat: ctor arg types parsed as raw source strings; registry defaults to int for any single-arg ctor. Proper type-source parsing pending. --- lib/ocaml/infer.sx | 108 ++++++++++++++++++++++++++++++++++++++++--- lib/ocaml/test.sh | 22 +++++++++ plans/ocaml-on-sx.md | 22 +++++++-- 3 files changed, 141 insertions(+), 11 deletions(-) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index 211c880f..c07bf1ef 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -228,8 +228,8 @@ (define ocaml-infer-pcon (fn (name arg-pats env counter) (cond - ((has-key? ocaml-hm-ctors name) - (let ((ctor-type (hm-instantiate (get ocaml-hm-ctors name) counter)) + ((ocaml-hm-ctor-has? name) + (let ((ctor-type (hm-instantiate (ocaml-hm-ctor-lookup name) counter)) (env-cur env) (subst {})) (let ((cur-type (list nil))) (begin @@ -388,21 +388,60 @@ {:subst subst :type (ocaml-hm-list (hm-apply subst elem-tv))})))))) -(define ocaml-hm-ctors (ocaml-hm-ctor-env)) +;; Mutable cell so user `type` declarations can extend the registry. +(define ocaml-hm-ctors (list (ocaml-hm-ctor-env))) + +(define ocaml-hm-ctor-lookup + (fn (name) (get (nth ocaml-hm-ctors 0) name))) + +(define ocaml-hm-ctor-has? + (fn (name) (has-key? (nth ocaml-hm-ctors 0) name))) + +(define ocaml-hm-ctor-register! + (fn (name scheme) + (set-nth! ocaml-hm-ctors 0 + (merge (nth ocaml-hm-ctors 0) (dict name scheme))))) + +;; Process a :type-def AST. For each ctor, build its scheme: +;; nullary `A` → scheme [] (con NAME [param-tvs...]) +;; ctor `B of int` → scheme [] (int -> (con NAME [...])) +;; Argument types are ignored for now (they're raw source strings) — we +;; assume `int`. A future iteration parses arg types properly. +(define ocaml-hm-register-type-def! + (fn (type-def) + (let ((name (nth type-def 1)) + (params (nth type-def 2)) + (ctors (nth type-def 3))) + (let ((param-tvs (map hm-tv params))) + (let ((self-type (hm-con name param-tvs))) + (begin + (define register-ctor + (fn (ctor) + (let ((cname (first ctor)) + (arg-srcs (rest ctor))) + (cond + ((= (len arg-srcs) 0) + (ocaml-hm-ctor-register! cname + (hm-scheme params self-type))) + (else + ;; Single-arg ctor with arg type `int` (placeholder). + ;; Multi-arg or other-typed ctors fall back to int. + (ocaml-hm-ctor-register! cname + (hm-scheme params + (hm-arrow (hm-int) self-type)))))))) + (for-each register-ctor ctors))))))) (set! ocaml-infer (fn (expr env counter) (let ((tag (nth expr 0))) (cond ((= tag "con") - ;; (:con NAME) — look up constructor type, instantiate fresh. (let ((name (nth expr 1))) (cond - ((has-key? ocaml-hm-ctors name) + ((ocaml-hm-ctor-has? name) {:subst {} - :type (hm-instantiate (get ocaml-hm-ctors name) counter)}) + :type (hm-instantiate (ocaml-hm-ctor-lookup name) counter)}) (else - ;; Unknown ctor — treat as a fresh polymorphic type. {:subst {} :type (hm-fresh-tv counter)})))) ((= tag "int") {:subst {} :type (hm-int)}) ((= tag "float") {:subst {} :type (hm-int)}) ;; treat float as int for now @@ -448,6 +487,61 @@ (let ((r (ocaml-infer expr env counter))) (ocaml-hm-format-type (hm-apply (get r :subst) (get r :type))))))) +;; Program-level type inference: process decls in order, registering +;; type-defs with the ctor registry, threading let-bindings into the +;; env, and returning the type of the last expression-level form. +(define ocaml-type-of-program + (fn (src) + (let ((prog (ocaml-parse-program src)) + (env (ocaml-hm-builtin-env)) + (counter (ocaml-hm-counter)) + (last-type (hm-tv "?"))) + (begin + (define run-decl + (fn (decl) + (let ((tag (nth decl 0))) + (cond + ((= tag "type-def") (ocaml-hm-register-type-def! decl)) + ((= tag "exception-def") nil) + ((= tag "def") + (let ((nm (nth decl 1)) (ps (nth decl 2)) (rh (nth decl 3))) + (let ((rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((env2 (hm-apply-env s env))) + (let ((scheme (hm-generalize t env2))) + (begin + (set! env (assoc env2 nm scheme)) + (set! last-type t))))))))) + ((= tag "def-rec") + (let ((nm (nth decl 1)) (ps (nth decl 2)) (rh (nth decl 3))) + (let ((rec-tv (hm-fresh-tv counter))) + (let ((env-rec (assoc env nm (hm-monotype rec-tv))) + (rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env-rec counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify (hm-apply s rec-tv) t s))) + (let ((env2 (hm-apply-env s2 env))) + (let ((scheme (hm-generalize (hm-apply s2 t) env2))) + (begin + (set! env (assoc env2 nm scheme)) + (set! last-type t))))))))))) + ((= tag "expr") + (let ((r (ocaml-infer (nth decl 1) env counter))) + (set! last-type + (hm-apply (get r :subst) (get r :type))))) + (else nil))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (run-decl (first xs)) (loop (rest xs)))))) + (loop (rest prog)) + (ocaml-hm-format-type last-type))))) + ;; Pretty-print a type as an OCaml-style string for testing. Only handles ;; the constructors we use: Int / Bool / String / Unit / -> / type-vars. (define ocaml-hm-format-type diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 7ebdec78..4a6ad7b0 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -886,6 +886,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2005) (eval "(ocaml-type-of \"let rec sum lst = match lst with | [] -> 0 | h :: t -> h + sum t in sum [1; 2; 3]\")") +;; ── HM with user type declarations ───────────────────────────── +(epoch 2100) +(eval "(ocaml-type-of-program \"type color = Red | Green | Blue;; Red\")") +(epoch 2101) +(eval "(ocaml-type-of-program \"type shape = Circle of int | Square of int;; Circle 5\")") +(epoch 2102) +(eval "(ocaml-type-of-program \"type color = Red | Green;; let c = Red;; c\")") +(epoch 2103) +(eval "(ocaml-type-of-program \"type shape = Circle of int | Square of int;; let area s = match s with | Circle r -> r * r | Square s -> s * s;; area\")") +(epoch 2104) +(eval "(ocaml-type-of-program \"let x = 1;; let y = 2;; x + y\")") +(epoch 2105) +(eval "(ocaml-type-of-program \"let rec fact n = if n = 0 then 1 else n * fact (n - 1);; fact 5\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1401,6 +1415,14 @@ check 2003 "type 1::list" '"Int list"' check 2004 "type [1] @ [2;3]" '"Int list"' check 2005 "type sum" '"Int"' +# ── HM with user type-defs ────────────────────────────────────── +check 2100 "user type Red : color" '"color"' +check 2101 "user type Circle 5 : shape" '"shape"' +check 2102 "let c = Red; c" '"color"' +check 2103 "shape -> Int" '"shape -> Int"' +check 2104 "program x+y" '"Int"' +check 2105 "program fact 5" '"Int"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ec4b930b..328791eb 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -227,10 +227,12 @@ SX CEK evaluator (both JS and OCaml hosts) pattern matching, let-rec, modules.)_ - [x] Type variables: `'a`, `'b`; unification with occur-check (kit). - [x] Let-polymorphism: generalise at let-bindings (kit `hm-generalize`). -- [~] ADT types: `option`/`result` ctors hardcoded in - `ocaml-hm-ctor-env`; `:con NAME` and `:pcon NAME …` look up the - scheme and instantiate. User type-defs would extend the registry. - Format: `Int option`, `(Int, 'b) result`. +- [x] ADT types: `option`/`result` ctors seeded; + `ocaml-hm-register-type-def!` registers user types from `:type-def`. + `ocaml-type-of-program` threads decls through the env, registering + types and binding `let` schemes. `:con NAME` / `:pcon NAME …` + instantiate from the registry. _(Caveat: ctor arg types currently + default to `int` — proper type parsing pending.)_ - [~] Function types `T1 -> T2` work; tuples (`'a * 'b`) and lists (`'a list`) supported. Records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. @@ -368,6 +370,18 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — HM with user `type` declarations (+6 tests, 363 + total). `ocaml-hm-ctors` is now a mutable list cell; user type-defs + register their constructors via `ocaml-hm-register-type-def!`. New + `ocaml-type-of-program` processes top-level decls in order: type-def + registers ctors, def/def-rec generalize, exception-def is a no-op, + expr returns its inferred type. Examples: + `type color = Red | Green | Blue;; Red : color` + `type shape = Circle of int | Square of int;; let area s = match s + with | Circle r -> r * r | Square s -> s * s;; area : shape -> Int` + Caveat: ctor arg types parsed as raw source strings; the registry + defaults to `int` for any single-arg ctor. Proper type-source parsing + is pending. - 2026-05-08 Phase 5 — HM let-rec inference + `::`/`@` operator types (+6 tests, 357 total). `ocaml-infer-let-rec` pre-binds the function name to a fresh tv, infers rhs (which can recursively call name), From de7be332c8346819f775ff5e9ea43ad024316c63 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:33:24 +0000 Subject: [PATCH 034/298] ocaml: phase 5.1 baseline OCaml programs (5/5 pass) + lookahead boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/ocaml/baseline/{factorial,list_ops,option_match,module_use,sum_squares}.ml exercised through ocaml-run-program (file-read F). lib/ocaml/baseline/ run.sh runs them and compares against expected.json — all 5 pass. To make module_use.ml (with nested let-in) parse, parser's skip-let-rhs-boundary! now uses has-matching-in? lookahead: a let at depth 0 in a let-decl rhs opens a nested block IFF a matching in exists before any decl-keyword. Without that in, the let is a new top-level decl (preserves test 274 'let x = 1 let y = 2'). This is the first piece of Phase 5.1 'vendor a slice of OCaml testsuite' — handcrafted fixtures for now, real testsuite TBD. --- lib/ocaml/baseline/expected.json | 7 +++ lib/ocaml/baseline/factorial.ml | 4 ++ lib/ocaml/baseline/list_ops.ml | 5 ++ lib/ocaml/baseline/module_use.ml | 14 +++++ lib/ocaml/baseline/option_match.ml | 8 +++ lib/ocaml/baseline/run.sh | 75 +++++++++++++++++++++++ lib/ocaml/baseline/sum_squares.ml | 6 ++ lib/ocaml/parser.sx | 96 ++++++++++++++++++++++++++---- plans/ocaml-on-sx.md | 19 ++++-- 9 files changed, 217 insertions(+), 17 deletions(-) create mode 100644 lib/ocaml/baseline/expected.json create mode 100644 lib/ocaml/baseline/factorial.ml create mode 100644 lib/ocaml/baseline/list_ops.ml create mode 100644 lib/ocaml/baseline/module_use.ml create mode 100644 lib/ocaml/baseline/option_match.ml create mode 100755 lib/ocaml/baseline/run.sh create mode 100644 lib/ocaml/baseline/sum_squares.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json new file mode 100644 index 00000000..f5c089ad --- /dev/null +++ b/lib/ocaml/baseline/expected.json @@ -0,0 +1,7 @@ +{ + "factorial.ml": 3628800, + "list_ops.ml": 30, + "option_match.ml": 5, + "module_use.ml": 3, + "sum_squares.ml": 385 +} diff --git a/lib/ocaml/baseline/factorial.ml b/lib/ocaml/baseline/factorial.ml new file mode 100644 index 00000000..e2234fff --- /dev/null +++ b/lib/ocaml/baseline/factorial.ml @@ -0,0 +1,4 @@ +(* Baseline: factorial via let-rec *) +let rec fact n = + if n = 0 then 1 else n * fact (n - 1) ;; +fact 10 diff --git a/lib/ocaml/baseline/list_ops.ml b/lib/ocaml/baseline/list_ops.ml new file mode 100644 index 00000000..fd1d23c7 --- /dev/null +++ b/lib/ocaml/baseline/list_ops.ml @@ -0,0 +1,5 @@ +(* Baseline: List functions exercise *) +let xs = [1; 2; 3; 4; 5] ;; +let doubled = List.map (fun x -> x * 2) xs ;; +let total = List.fold_left (fun a b -> a + b) 0 doubled ;; +total diff --git a/lib/ocaml/baseline/module_use.ml b/lib/ocaml/baseline/module_use.ml new file mode 100644 index 00000000..25e33331 --- /dev/null +++ b/lib/ocaml/baseline/module_use.ml @@ -0,0 +1,14 @@ +(* Baseline: module declaration + use *) +module Counter = struct + let make () = + let n = ref 0 in + fun () -> + n := !n + 1 ; + !n +end ;; +let result = + let c = Counter.make () in + let _ = c () in + let _ = c () in + c () ;; +result diff --git a/lib/ocaml/baseline/option_match.ml b/lib/ocaml/baseline/option_match.ml new file mode 100644 index 00000000..a2d0f864 --- /dev/null +++ b/lib/ocaml/baseline/option_match.ml @@ -0,0 +1,8 @@ +(* Baseline: option type + pattern matching *) +let safe_div a b = + if b = 0 then None else Some (a / b) ;; +let result = + match safe_div 20 4 with + | None -> 0 + | Some x -> x ;; +result diff --git a/lib/ocaml/baseline/run.sh b/lib/ocaml/baseline/run.sh new file mode 100755 index 00000000..516d855c --- /dev/null +++ b/lib/ocaml/baseline/run.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# lib/ocaml/baseline/run.sh — run each baseline OCaml program through +# ocaml-run-program and compare to expected.json. + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi + +PASS=0 +FAIL=0 +ERRORS="" + +for f in lib/ocaml/baseline/*.ml; do + name=$(basename "$f") + expected=$(grep -oE "\"$name\"[[:space:]]*:[[:space:]]*[0-9-]+" lib/ocaml/baseline/expected.json | sed -E 's/.*:[[:space:]]*//') + if [ -z "$expected" ]; then + continue + fi + + TMP=$(mktemp) + cat > "$TMP" << EPOCHS +(epoch 1) +(load "lib/guest/lex.sx") +(load "lib/guest/prefix.sx") +(load "lib/guest/pratt.sx") +(load "lib/ocaml/tokenizer.sx") +(load "lib/ocaml/parser.sx") +(load "lib/ocaml/eval.sx") +(load "lib/ocaml/runtime.sx") +(eval "(ocaml-load-stdlib!)") +(epoch 2) +(eval "(ocaml-run-program (file-read \\"$f\\"))") +EPOCHS + + output=$(timeout 60 "$SX_SERVER" < "$TMP" 2>/dev/null | grep -E '^\(ok-len 2|^\(ok 2' | head -1) + rm -f "$TMP" + + # Pull the next line which has the value + result=$(timeout 60 "$SX_SERVER" < <(cat </dev/null | awk '/^\(ok-len 2 / {getline; print; exit} /^\(ok 2 / {sub(/^\(ok 2 /, ""); sub(/\)$/, ""); print; exit}') + + if [ "$result" = "$expected" ]; then + PASS=$((PASS + 1)) + echo " ok $name → $result" + else + FAIL=$((FAIL + 1)) + ERRORS+=" FAIL $name expected=$expected got=$result +" + fi +done + +TOTAL=$((PASS + FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL baseline OCaml programs run correctly" +else + echo "FAIL $PASS/$TOTAL baseline programs" + echo "$ERRORS" +fi +[ $FAIL -eq 0 ] diff --git a/lib/ocaml/baseline/sum_squares.ml b/lib/ocaml/baseline/sum_squares.ml new file mode 100644 index 00000000..a1a17778 --- /dev/null +++ b/lib/ocaml/baseline/sum_squares.ml @@ -0,0 +1,6 @@ +(* Baseline: imperative loop summing squares *) +let total = ref 0 ;; +for i = 1 to 10 do + total := !total + i * i +done ;; +!total diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index b85dba56..63e691a8 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -895,12 +895,88 @@ (fn () (let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos))))) - ;; skip-to-boundary! advances `idx` to the next top-level decl - ;; boundary, tracking `let`/`begin`/`struct` etc. nesting so that - ;; an inner `let ... in ...` doesn't terminate a top-level decl - ;; body. Boundary tokens (when at depth 0): - ;; ;; let module open include and type exception - ;; Boundary at any depth: eof. + ;; Two flavors of boundary skipping: + ;; + ;; * `skip-to-decl-boundary!` — used by parse-decl-expr. Stops + ;; at the start of the next top-level decl: ;;, let, module, + ;; open, include, and, type, exception, or eof. + ;; + ;; * `skip-let-rhs-boundary!` — used inside parse-decl-let after + ;; the `=`. Treats `let` as the opener of a nested let..in + ;; block (NOT a decl boundary), so `let f x = let y = 0 in y` + ;; parses correctly. Boundary tokens (depth 0): ;;, module, + ;; open, include, and, type, exception, or eof. + ;; Lookahead: starting just past a `let` at the cursor, scan + ;; for a matching `in` before the next decl boundary. Returns + ;; true iff such an `in` exists — meaning the let is nested, + ;; not a new decl. + (define has-matching-in? + (fn () + (let ((p (+ idx 1)) (d 1) (result false) (done false)) + (begin + (define scan + (fn () + (when (not done) + (cond + ((>= p tok-len) (set! done true)) + (else + (let ((t (nth tokens p))) + (let ((tt (ocaml-tok-type t)) (tv (ocaml-tok-value t))) + (cond + ((= tt "eof") (set! done true)) + ((and (= tt "op") (= tv ";;")) (set! done true)) + ((and (= tt "keyword") (= tv "module")) (set! done true)) + ((and (= tt "keyword") (= tv "type")) (set! done true)) + ((and (= tt "keyword") (= tv "exception")) (set! done true)) + ((and (= tt "keyword") (= tv "open")) (set! done true)) + ((and (= tt "keyword") (= tv "include")) (set! done true)) + ((and (= tt "keyword") (= tv "and")) (set! done true)) + ((and (= tt "keyword") (= tv "let")) + (begin (set! d (+ d 1)) (set! p (+ p 1)) (scan))) + ((and (= tt "keyword") (= tv "in")) + (cond + ((= d 1) (begin (set! result true) (set! done true))) + (else + (begin (set! d (- d 1)) (set! p (+ p 1)) (scan))))) + (else (begin (set! p (+ p 1)) (scan))))))))))) + (scan) + result)))) + + ;; Same as skip-to-boundary but treats inner `let` as the start + ;; of a nested let..in (open depth) IF a matching `in` exists + ;; before any decl boundary; otherwise stops. + (define + skip-let-rhs-boundary! + (fn () + (let ((depth 0)) + (begin + (define step + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((and (= depth 0) (at-op? ";;")) nil) + ((and (= depth 0) (at-kw? "module")) nil) + ((and (= depth 0) (at-kw? "open")) nil) + ((and (= depth 0) (at-kw? "include")) nil) + ((and (= depth 0) (at-kw? "and")) nil) + ((and (= depth 0) (at-kw? "type")) nil) + ((and (= depth 0) (at-kw? "exception")) nil) + ((and (= depth 0) (at-kw? "let")) + (cond + ((has-matching-in?) + (begin (set! depth (+ depth 1)) (advance-tok!) (step))) + (else nil))) + ((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct") + (at-kw? "sig") (at-kw? "for") (at-kw? "while")) + (begin (set! depth (+ depth 1)) (advance-tok!) (step))) + ((or (at-kw? "in") (at-kw? "end") (at-kw? "done")) + (begin + (when (> depth 0) (set! depth (- depth 1))) + (advance-tok!) (step))) + (else (begin (advance-tok!) (step)))))) + (step))))) + (define skip-to-boundary! (fn () @@ -919,12 +995,6 @@ ((and (= depth 0) (at-kw? "and")) nil) ((and (= depth 0) (at-kw? "type")) nil) ((and (= depth 0) (at-kw? "exception")) nil) - ;; Track nested blocks that have explicit closing - ;; tokens. let..in / begin..end / struct..end / - ;; sig..end / for..done / while..done. `if`/`match`/ - ;; `try` don't have hard close tokens so we don't - ;; track them — their bodies are bounded by the - ;; surrounding expression structure. ((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct") (at-kw? "sig") (at-kw? "for") (at-kw? "while")) (begin (set! depth (+ depth 1)) (advance-tok!) (step))) @@ -968,7 +1038,7 @@ (consume! "op" "=") (let ((expr-start (cur-pos))) (begin - (skip-to-boundary!) + (skip-let-rhs-boundary!) (let ((expr-src (slice src expr-start (cur-pos)))) (let ((expr (ocaml-parse expr-src))) (append! bindings (list nm ps expr)))))))))) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 328791eb..8ad74a6f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -214,10 +214,12 @@ SX CEK evaluator (both JS and OCaml hosts) eval-core, phase2-refs, phase2-loops, phase2-function, phase2-exn, phase3-adt, phase4-modules, phase5-hm, phase6-stdlib, let-and, phase1-params, misc), and emits `scoreboard.json` + `scoreboard.md`. -- [ ] Vendor a slice of the OCaml testsuite at `lib/ocaml/baseline/` - and feed it through `ocaml-run-program`, scoring per-file - conformance. _(Pending — needs more stdlib coverage and ADT type - decls to make most testsuite files runnable.)_ +- [~] Baseline OCaml programs at `lib/ocaml/baseline/` exercised through + `ocaml-run-program`. Currently 5/5: factorial.ml (recursion), + list_ops.ml (List.map + fold_left), option_match.ml (option + + pattern match), module_use.ml (module + ref + closure + + sequenced calls), sum_squares.ml (for-loop + ref). Real OCaml + testsuite vendoring is the next step. ### Phase 5 — Hindley-Milner type inference @@ -370,6 +372,15 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5.1 — `lib/ocaml/baseline/` with five sample OCaml + programs (.ml files), driven by `lib/ocaml/baseline/run.sh` through + `ocaml-run-program (file-read F)`. All 5/5 pass: factorial, + list_ops, option_match, module_use (module + ref + closure + + sequenced calls), sum_squares (for-loop). To make module_use parse, + parser's `skip-let-rhs-boundary!` now lookaheads for a matching `in` + before any decl-keyword — distinguishes nested let-in from a new + top-level decl. Test 274 (`let x = 1 let y = 2`) still works because + its body has no inner `in`. - 2026-05-08 Phase 5 — HM with user `type` declarations (+6 tests, 363 total). `ocaml-hm-ctors` is now a mutable list cell; user type-defs register their constructors via `ocaml-hm-register-type-def!`. New From 46d0eb258ee9a4a4496147db6488d627037ec64a Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:44:28 +0000 Subject: [PATCH 035/298] =?UTF-8?q?ocaml:=20phase=205.1=20baseline=208/8?= =?UTF-8?q?=20=E2=80=94=20quicksort=20+=20exceptions=20+=20closures?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added 3 baseline programs: - closures.ml — curried make_adder; verifies closure capture - quicksort.ml — recursive sort using List.filter + List.append, sums result - exception_handle.ml — exception NegArg of int + raise + try/with All 8/8 baseline programs pass through ocaml-run-program. Combined the suite exercises: let-rec, modules, refs, for-loops, pattern matching, exceptions, lambdas, list ops (map/filter/append/fold), arithmetic. run.sh streamlined to one sx_server invocation per program. End-to-end runtime ≈2 min. --- lib/ocaml/baseline/closures.ml | 5 +++++ lib/ocaml/baseline/exception_handle.ml | 17 ++++++++++++++ lib/ocaml/baseline/expected.json | 5 ++++- lib/ocaml/baseline/quicksort.ml | 10 +++++++++ lib/ocaml/baseline/run.sh | 31 +++++++++----------------- plans/ocaml-on-sx.md | 7 ++++++ 6 files changed, 53 insertions(+), 22 deletions(-) create mode 100644 lib/ocaml/baseline/closures.ml create mode 100644 lib/ocaml/baseline/exception_handle.ml create mode 100644 lib/ocaml/baseline/quicksort.ml diff --git a/lib/ocaml/baseline/closures.ml b/lib/ocaml/baseline/closures.ml new file mode 100644 index 00000000..628dd9c3 --- /dev/null +++ b/lib/ocaml/baseline/closures.ml @@ -0,0 +1,5 @@ +(* Baseline: closures + curried application *) +let make_adder n = fun x -> n + x ;; +let add5 = make_adder 5 ;; +let add10 = make_adder 10 ;; +add5 100 + add10 200 diff --git a/lib/ocaml/baseline/exception_handle.ml b/lib/ocaml/baseline/exception_handle.ml new file mode 100644 index 00000000..4f2b7993 --- /dev/null +++ b/lib/ocaml/baseline/exception_handle.ml @@ -0,0 +1,17 @@ +(* Baseline: exception declaration + raise + try-with *) +exception NegArg of int ;; +let safe_sqrt n = + if n < 0 then raise (NegArg n) + else + begin + let rec find_sqrt i = + if i * i > n then i - 1 + else find_sqrt (i + 1) + in find_sqrt 0 + end ;; +let result = + try + safe_sqrt 16 + with + | NegArg _ -> 0 ;; +result diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f5c089ad..167d68c1 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,7 +1,10 @@ { + "closures.ml": 315, + "exception_handle.ml": 4, "factorial.ml": 3628800, "list_ops.ml": 30, - "option_match.ml": 5, "module_use.ml": 3, + "option_match.ml": 5, + "quicksort.ml": 44, "sum_squares.ml": 385 } diff --git a/lib/ocaml/baseline/quicksort.ml b/lib/ocaml/baseline/quicksort.ml new file mode 100644 index 00000000..8b49889c --- /dev/null +++ b/lib/ocaml/baseline/quicksort.ml @@ -0,0 +1,10 @@ +(* Baseline: quicksort over a list, returns sum of sorted result *) +let rec quicksort lst = + match lst with + | [] -> [] + | pivot :: rest -> + let smaller = List.filter (fun x -> x < pivot) rest in + let larger = List.filter (fun x -> x >= pivot) rest in + List.append (quicksort smaller) (pivot :: quicksort larger) ;; +let sorted = quicksort [3; 1; 4; 1; 5; 9; 2; 6; 5; 3; 5] ;; +List.fold_left (fun a b -> a + b) 0 sorted diff --git a/lib/ocaml/baseline/run.sh b/lib/ocaml/baseline/run.sh index 516d855c..6e62e688 100755 --- a/lib/ocaml/baseline/run.sh +++ b/lib/ocaml/baseline/run.sh @@ -22,25 +22,7 @@ for f in lib/ocaml/baseline/*.ml; do fi TMP=$(mktemp) - cat > "$TMP" << EPOCHS -(epoch 1) -(load "lib/guest/lex.sx") -(load "lib/guest/prefix.sx") -(load "lib/guest/pratt.sx") -(load "lib/ocaml/tokenizer.sx") -(load "lib/ocaml/parser.sx") -(load "lib/ocaml/eval.sx") -(load "lib/ocaml/runtime.sx") -(eval "(ocaml-load-stdlib!)") -(epoch 2) -(eval "(ocaml-run-program (file-read \\"$f\\"))") -EPOCHS - - output=$(timeout 60 "$SX_SERVER" < "$TMP" 2>/dev/null | grep -E '^\(ok-len 2|^\(ok 2' | head -1) - rm -f "$TMP" - - # Pull the next line which has the value - result=$(timeout 60 "$SX_SERVER" < <(cat < "$TMP" << EOF (epoch 1) (load "lib/guest/lex.sx") (load "lib/guest/prefix.sx") @@ -52,8 +34,15 @@ EPOCHS (eval "(ocaml-load-stdlib!)") (epoch 2) (eval "(ocaml-run-program (file-read \"$f\"))") -EPOCHS -) 2>/dev/null | awk '/^\(ok-len 2 / {getline; print; exit} /^\(ok 2 / {sub(/^\(ok 2 /, ""); sub(/\)$/, ""); print; exit}') +EOF + + output=$(timeout 60 "$SX_SERVER" < "$TMP" 2>/dev/null) + rm -f "$TMP" + + result=$(echo "$output" | awk ' + /^\(ok-len 2 / { getline; print; exit } + /^\(ok 2 [^)]+\)$/ { sub(/^\(ok 2 /, ""); sub(/\)$/, ""); print; exit } + ') if [ "$result" = "$expected" ]; then PASS=$((PASS + 1)) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 8ad74a6f..6a279534 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -372,6 +372,13 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5.1 — baseline expanded to 8 programs (8/8 pass). + Added: closures.ml (curried adders), quicksort.ml (recursive sort + on lists), exception_handle.ml (exception decl + raise + try/with). + All 8 programs together exercise let-rec, modules, refs, for-loops, + pattern matching, exceptions, lambdas, list functions, arithmetic. + Run.sh streamlined to one sx_server invocation per program (was + two). End-to-end runtime ≈2 min for the suite. - 2026-05-08 Phase 5.1 — `lib/ocaml/baseline/` with five sample OCaml programs (.ml files), driven by `lib/ocaml/baseline/run.sh` through `ocaml-run-program (file-read F)`. All 5/5 pass: factorial, From dac9cf124fc8f069e603dba2826e4138ede8c508 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:45:29 +0000 Subject: [PATCH 036/298] ocaml: refresh scoreboard (364/364 across 14 suites) --- lib/ocaml/scoreboard.json | 10 +++++----- lib/ocaml/scoreboard.md | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 97cb623d..5dece3f2 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -2,8 +2,8 @@ "suites": { "eval-core": {"pass": 50, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 61, "fail": 0}, - "parser": {"pass": 93, "fail": 0}, + "misc": {"pass": 62, "fail": 0}, + "parser": {"pass": 95, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, @@ -11,11 +11,11 @@ "phase2-refs": {"pass": 6, "fail": 0}, "phase3-adt": {"pass": 24, "fail": 0}, "phase4-modules": {"pass": 12, "fail": 0}, - "phase5-hm": {"pass": 31, "fail": 0}, + "phase5-hm": {"pass": 34, "fail": 0}, "phase6-stdlib": {"pass": 43, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 358, + "total_pass": 364, "total_fail": 0, - "total": 358 + "total": 364 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index 00747ecb..c32a550f 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,13 +1,13 @@ # OCaml-on-SX scoreboard -358 / 358 tests passing. +364 / 364 tests passing. | Suite | Pass | Fail | |---|---:|---:| | eval-core | 50 | 0 | | let-and | 3 | 0 | -| misc | 61 | 0 | -| parser | 93 | 0 | +| misc | 62 | 0 | +| parser | 95 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | @@ -15,6 +15,6 @@ | phase2-refs | 6 | 0 | | phase3-adt | 24 | 0 | | phase4-modules | 12 | 0 | -| phase5-hm | 31 | 0 | +| phase5-hm | 34 | 0 | | phase6-stdlib | 43 | 0 | | tokenize | 18 | 0 | From 16df48ff74c0c228afcf9eff47f3a529c1a3aaed Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:48:48 +0000 Subject: [PATCH 037/298] ocaml: phase 6 List.combine/split/iter2/fold_left2/map2 (+4, 367 total) Mechanical pair-walk OCaml implementations. failwith on length mismatch matches Stdlib semantics. List module now covers 30+ functions. --- lib/ocaml/runtime.sx | 41 +++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 7 ++++++- 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 28e101a7..3d754646 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -157,6 +157,47 @@ end let stable_sort = sort + + let rec combine xs ys = + match xs with + | [] -> (match ys with + | [] -> [] + | _ -> failwith \"List.combine: unequal lengths\") + | hx :: tx -> + match ys with + | [] -> failwith \"List.combine: unequal lengths\" + | hy :: ty -> (hx, hy) :: combine tx ty + + let rec split lst = + match lst with + | [] -> ([], []) + | (a, b) :: t -> + (match split t with + | (xs, ys) -> (a :: xs, b :: ys)) + + let rec fold_left2 f acc xs ys = + match xs with + | [] -> (match ys with [] -> acc | _ -> failwith \"List.fold_left2: unequal\") + | hx :: tx -> + match ys with + | [] -> failwith \"List.fold_left2: unequal\" + | hy :: ty -> fold_left2 f (f acc hx hy) tx ty + + let rec iter2 f xs ys = + match xs with + | [] -> (match ys with [] -> () | _ -> failwith \"List.iter2: unequal\") + | hx :: tx -> + match ys with + | [] -> failwith \"List.iter2: unequal\" + | hy :: ty -> f hx hy; iter2 f tx ty + + let rec map2 f xs ys = + match xs with + | [] -> (match ys with [] -> [] | _ -> failwith \"List.map2: unequal\") + | hx :: tx -> + match ys with + | [] -> failwith \"List.map2: unequal\" + | hy :: ty -> f hx hy :: map2 f tx ty end ;; module Option = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 4a6ad7b0..23c0d5d1 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -900,6 +900,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2105) (eval "(ocaml-type-of-program \"let rec fact n = if n = 0 then 1 else n * fact (n - 1);; fact 5\")") +;; ── More List functions: combine/split/iter2/fold_left2/map2 ── +(epoch 2200) +(eval "(ocaml-run \"List.combine [1;2;3] [\\\"a\\\";\\\"b\\\";\\\"c\\\"]\")") +(epoch 2201) +(eval "(ocaml-run \"List.split [(1,\\\"a\\\");(2,\\\"b\\\")]\")") +(epoch 2202) +(eval "(ocaml-run \"List.fold_left2 (fun a b c -> a + b + c) 0 [1;2;3] [10;20;30]\")") +(epoch 2203) +(eval "(ocaml-run \"List.map2 (fun a b -> a + b) [1;2;3] [10;20;30]\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1423,6 +1433,12 @@ check 2103 "shape -> Int" '"shape -> Int"' check 2104 "program x+y" '"Int"' check 2105 "program fact 5" '"Int"' +# ── More List functions ───────────────────────────────────────── +check 2200 "List.combine" '("tuple" 3 "c")' +check 2201 "List.split" '("tuple" (1 2) ("a" "b"))' +check 2202 "List.fold_left2" '66' +check 2203 "List.map2" '(11 22 33)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6a279534..1abc3ab5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -250,7 +250,8 @@ SX CEK evaluator (both JS and OCaml hosts) `append`, `iter`, `for_all`, `exists`, `mem`, `nth`, `hd`, `tl`, `rev_append`, `concat`/`flatten`, `init`, `iteri`, `mapi`, `find`, `find_opt`, `assoc`, `assoc_opt`, `partition`, `sort`, - `stable_sort` (insertion sort, O(n²)). _(Pending: combine, split.)_ + `stable_sort`, `combine`, `split`, `iter2`, `fold_left2`, `map2`. + 30+ functions covered. - [~] `Option`: `map`, `bind`, `value`, `get`, `is_none`, `is_some`, `iter`, `fold`, `to_list`. _(Pending: join/to_result.)_ - [~] `Result`: `map`, `bind`, `is_ok`, `is_error`, `get_ok`, @@ -372,6 +373,10 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — List.combine/split/iter2/fold_left2/map2 (+4 + tests, 367 total). Mechanical pair-walk OCaml implementations, + failwith on length-mismatch matching Stdlib semantics. List module + now covers 30+ functions. - 2026-05-08 Phase 5.1 — baseline expanded to 8 programs (8/8 pass). Added: closures.ml (curried adders), quicksort.ml (recursive sort on lists), exception_handle.ml (exception decl + raise + try/with). From ee002f2e027761983ab6f1ac8573632f148a7005 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:55:04 +0000 Subject: [PATCH 038/298] ocaml: phase 1/5/6 float arithmetic +./-./*./. (+5 tests, 372 total) Tokenizer: +. -. *. /. (with -. avoiding clash with negative float literals). Parser table places dotted ops at int-precedence levels. Eval routes to host SX +/-/*//. HM types them Float->Float->Float; literal floats now infer as Float (was Int). OCaml-style 1.5 +. 2.5 : Float works end-to-end through tokenize + parse + eval + infer. --- lib/ocaml/eval.sx | 4 ++++ lib/ocaml/infer.sx | 13 ++++++++++++- lib/ocaml/parser.sx | 4 ++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ lib/ocaml/tokenizer.sx | 8 ++++++++ plans/ocaml-on-sx.md | 6 ++++++ 6 files changed, 53 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 74032419..d7843211 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -328,6 +328,10 @@ ((= op "-") (- lhs rhs)) ((= op "*") (* lhs rhs)) ((= op "/") (/ lhs rhs)) + ((= op "+.") (+ lhs rhs)) + ((= op "-.") (- lhs rhs)) + ((= op "*.") (* lhs rhs)) + ((= op "/.") (/ lhs rhs)) ((= op "mod") (mod lhs rhs)) ((= op "%") (mod lhs rhs)) ((= op "**") (pow lhs rhs)) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index c07bf1ef..a37c5c59 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -37,9 +37,15 @@ "true" (hm-monotype (hm-bool)) "false" (hm-monotype (hm-bool))})))) +;; Float type isn't in the kit; use a named ctor. +(define ocaml-hm-float (fn () (hm-con "Float" (list)))) + (define ocaml-hm-builtin-env (fn () (let ((int-int-int (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-int)))) + (float-float-float + (hm-arrow (ocaml-hm-float) + (hm-arrow (ocaml-hm-float) (ocaml-hm-float)))) (int-int-bool (hm-arrow (hm-int) (hm-arrow (hm-int) (hm-bool)))) (bool-bool-bool (hm-arrow (hm-bool) (hm-arrow (hm-bool) (hm-bool)))) (str-str-str (hm-arrow (hm-string) (hm-arrow (hm-string) (hm-string)))) @@ -66,6 +72,10 @@ "-" (hm-monotype int-int-int) "*" (hm-monotype int-int-int) "/" (hm-monotype int-int-int) + "+." (hm-monotype float-float-float) + "-." (hm-monotype float-float-float) + "*." (hm-monotype float-float-float) + "/." (hm-monotype float-float-float) "mod" (hm-monotype int-int-int) "%" (hm-monotype int-int-int) "**" (hm-monotype int-int-int) @@ -444,7 +454,7 @@ (else {:subst {} :type (hm-fresh-tv counter)})))) ((= tag "int") {:subst {} :type (hm-int)}) - ((= tag "float") {:subst {} :type (hm-int)}) ;; treat float as int for now + ((= tag "float") {:subst {} :type (ocaml-hm-float)}) ((= tag "string") {:subst {} :type (hm-string)}) ((= tag "char") {:subst {} :type (hm-string)}) ((= tag "bool") {:subst {} :type (hm-bool)}) @@ -572,5 +582,6 @@ (let ((a (ocaml-hm-format-type (nth args 0))) (b (ocaml-hm-format-type (nth args 1)))) (str "(" a ", " b ") result"))) + ((= head "Float") "Float") (else head)))) (else (str t))))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 63e691a8..9dc08026 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -67,8 +67,12 @@ (list "::" 6 :right) (list "+" 7 :left) (list "-" 7 :left) + (list "+." 7 :left) + (list "-." 7 :left) (list "*" 8 :left) (list "/" 8 :left) + (list "*." 8 :left) + (list "/." 8 :left) (list "%" 8 :left) (list "mod" 8 :left) (list "land" 8 :left) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 23c0d5d1..e01147c5 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -910,6 +910,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2203) (eval "(ocaml-run \"List.map2 (fun a b -> a + b) [1;2;3] [10;20;30]\")") +;; ── Float arithmetic ─────────────────────────────────────────── +(epoch 2300) +(eval "(ocaml-run \"1.5 +. 2.5\")") +(epoch 2301) +(eval "(ocaml-run \"3.0 *. 2.0\")") +(epoch 2302) +(eval "(ocaml-run \"10.0 /. 4.0\")") +(epoch 2303) +(eval "(ocaml-type-of \"1.5 +. 2.5\")") +(epoch 2304) +(eval "(ocaml-type-of \"fun x y -> x +. y\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1439,6 +1451,13 @@ check 2201 "List.split" '("tuple" (1 2) ("a" "b"))' check 2202 "List.fold_left2" '66' check 2203 "List.map2" '(11 22 33)' +# ── Float arithmetic ──────────────────────────────────────────── +check 2300 "1.5 +. 2.5" '4' +check 2301 "3.0 *. 2.0" '6' +check 2302 "10.0 /. 4.0" '2.5' +check 2303 "type 1.5 +. 2.5" '"Float"' +check 2304 "type fun x y -> x +. y" '"Float -> Float -> Float"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/lib/ocaml/tokenizer.sx b/lib/ocaml/tokenizer.sx index d3882aab..245b83f3 100644 --- a/lib/ocaml/tokenizer.sx +++ b/lib/ocaml/tokenizer.sx @@ -294,6 +294,14 @@ (cond ((and (= c ";") (= c1 ";")) (begin (advance! 2) (push! "op" ";;" start) true)) + ((and (= c "+") (= c1 ".")) + (begin (advance! 2) (push! "op" "+." start) true)) + ((and (= c "-") (= c1 ".") (not (and (not (= c2 nil)) (ocaml-digit? c2)))) + (begin (advance! 2) (push! "op" "-." start) true)) + ((and (= c "*") (= c1 ".")) + (begin (advance! 2) (push! "op" "*." start) true)) + ((and (= c "/") (= c1 ".")) + (begin (advance! 2) (push! "op" "/." start) true)) ((and (= c "-") (= c1 ">")) (begin (advance! 2) (push! "op" "->" start) true)) ((and (= c "<") (= c1 "-")) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1abc3ab5..7df43b9a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -373,6 +373,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 1+5+6 — Float arithmetic (`+.` `-.` `*.` `/.`) + (+5 tests, 372 total). Tokenizer recognises the dotted operators. + Parser table places them at int's level (7 / 8). Eval routes them + to host SX `+`/`-`/`*`/`/` (which works for both ints and floats). + HM types them `Float -> Float -> Float`; `1.5 +. 2.5 : Float`. + Float type added to formatter as a plain `Float` ctor. - 2026-05-08 Phase 6 — List.combine/split/iter2/fold_left2/map2 (+4 tests, 367 total). Mechanical pair-walk OCaml implementations, failwith on length-mismatch matching Stdlib semantics. List module From 986b15c0e52947e335243655c3f224d7dae83d6f Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 13:58:52 +0000 Subject: [PATCH 039/298] ocaml: phase 6 Float module: sqrt/sin/cos/pow/floor/ceil/round/pi (+6 tests, 378 total) Wraps host SX math primitives via _float_* builtins. Float.pi is a Float literal in the OCaml-side module. --- lib/ocaml/eval.sx | 8 ++++++++ lib/ocaml/runtime.sx | 8 ++++++++ lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 +++++- 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index d7843211..b92da38f 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -67,6 +67,14 @@ (list "print_string" (fn (s) (begin (print s) nil))) (list "print_endline" (fn (s) (begin (println s) nil))) (list "print_int" (fn (i) (begin (print (str i)) nil))) + ;; Float math primitives. + (list "_float_sqrt" (fn (x) (sqrt x))) + (list "_float_sin" (fn (x) (sin x))) + (list "_float_cos" (fn (x) (cos x))) + (list "_float_pow" (fn (a) (fn (b) (pow a b)))) + (list "_float_floor" (fn (x) (floor x))) + (list "_float_ceil" (fn (x) (ceil x))) + (list "_float_round" (fn (x) (round x))) ;; Polymorphic compare — returns negative / 0 / positive like ;; OCaml's Stdlib.compare. Defers to host SX `<` and `>`. (list "compare" diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 3d754646..0c9b3e0e 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -316,6 +316,14 @@ module Float = struct let to_string f = _string_of_float f + let sqrt f = _float_sqrt f + let sin f = _float_sin f + let cos f = _float_cos f + let pow a b = _float_pow a b + let floor f = _float_floor f + let ceil f = _float_ceil f + let round f = _float_round f + let pi = 3.141592653589793 end ;; module Printf = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index e01147c5..0351d0f3 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -922,6 +922,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2304) (eval "(ocaml-type-of \"fun x y -> x +. y\")") +;; ── Float module ─────────────────────────────────────────────── +(epoch 2400) +(eval "(ocaml-run \"Float.sqrt 16.0\")") +(epoch 2401) +(eval "(ocaml-run \"Float.sin 0.0\")") +(epoch 2402) +(eval "(ocaml-run \"Float.cos 0.0\")") +(epoch 2403) +(eval "(ocaml-run \"Float.pow 2.0 10.0\")") +(epoch 2404) +(eval "(ocaml-run \"Float.floor 3.7\")") +(epoch 2405) +(eval "(ocaml-run \"Float.ceil 3.2\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1458,6 +1472,14 @@ check 2302 "10.0 /. 4.0" '2.5' check 2303 "type 1.5 +. 2.5" '"Float"' check 2304 "type fun x y -> x +. y" '"Float -> Float -> Float"' +# ── Float module ──────────────────────────────────────────────── +check 2400 "Float.sqrt 16" '4' +check 2401 "Float.sin 0" '0' +check 2402 "Float.cos 0" '1' +check 2403 "Float.pow 2 10" '1024' +check 2404 "Float.floor 3.7" '3' +check 2405 "Float.ceil 3.2" '4' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7df43b9a..2d34bf31 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -266,7 +266,8 @@ SX CEK evaluator (both JS and OCaml hosts) _(Pending: escaped.)_ - [~] `Int`: `to_string`, `of_string`, `abs`, `max`, `min`. _(Pending: arithmetic helpers, min_int/max_int.)_ -- [~] `Float`: `to_string`. _(Pending: of_string, arithmetic helpers.)_ +- [~] `Float`: `to_string`, `sqrt`, `sin`, `cos`, `pow`, `floor`, + `ceil`, `round`, `pi`. _(Pending: of_string.)_ - [~] `Printf`: stub `sprintf`/`printf`. _(Real format-string interpretation pending.)_ - [ ] `String`: `length`, `get`, `sub`, `concat`, `split_on_char`, `trim`, @@ -373,6 +374,9 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — Float module: sqrt/sin/cos/pow/floor/ceil/round + + pi constant (+6 tests, 378 total). Wraps host SX math primitives + via `_float_*` builtins. - 2026-05-08 Phase 1+5+6 — Float arithmetic (`+.` `-.` `*.` `/.`) (+5 tests, 372 total). Tokenizer recognises the dotted operators. Parser table places them at int's level (7 / 8). Eval routes them From 9f539ab392ce8edde6947d925097d84b28c39127 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 14:03:09 +0000 Subject: [PATCH 040/298] ocaml: phase 3 polymorphic variants (+4 tests, 382 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tokenizer recognises backtick followed by an upper ident, emitting a ctor token identical to a nominal ctor. Parser and evaluator treat polyvariants as ctors — same tagged-list runtime. So: `Red -> ("Red") `Some 42 -> ("Some" 42) match `Red with | `Red -> 1 | `Green -> 2 | `Blue -> 3 -> 1 `Pair (1,2) -> ("Pair" 1 2) (with tuple-arg flatten) Proper row types in HM deferred. --- lib/ocaml/test.sh | 16 ++++++++++++++++ lib/ocaml/tokenizer.sx | 14 ++++++++++++++ plans/ocaml-on-sx.md | 11 ++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0351d0f3..413f0645 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -936,6 +936,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2405) (eval "(ocaml-run \"Float.ceil 3.2\")") +;; ── Polymorphic variants ────────────────────────────────────── +(epoch 2500) +(eval "(ocaml-run \"\`Red\")") +(epoch 2501) +(eval "(ocaml-run \"\`Some 42\")") +(epoch 2502) +(eval "(ocaml-run \"match \`Red with | \`Red -> 1 | \`Green -> 2 | \`Blue -> 3\")") +(epoch 2503) +(eval "(ocaml-run \"match \`Pair (1, 2) with | \`Pair (a, b) -> a + b\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1480,6 +1490,12 @@ check 2403 "Float.pow 2 10" '1024' check 2404 "Float.floor 3.7" '3' check 2405 "Float.ceil 3.2" '4' +# ── Polymorphic variants ─────────────────────────────────────── +check 2500 "polyvar Red" '("Red")' +check 2501 "polyvar Some 42" '("Some" 42)' +check 2502 "polyvar match" '1' +check 2503 "polyvar Pair (a,b)" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/lib/ocaml/tokenizer.sx b/lib/ocaml/tokenizer.sx index 245b83f3..3949a556 100644 --- a/lib/ocaml/tokenizer.sx +++ b/lib/ocaml/tokenizer.sx @@ -381,6 +381,20 @@ (slice src (+ start 1) pos) start) (step))) + ;; Polymorphic variant tag: `Tag — emits a ctor token + ;; identical to a nominal ctor. Runtime is dynamic, so + ;; the distinction only matters for HM (deferred). + ((and (= c "`") + (< (+ pos 1) src-len) + (ocaml-upper? (ocaml-peek 1))) + (begin + (advance! 1) + (let ((ctor-start pos)) + (begin + (when (and (< pos src-len) (ocaml-ident-char? (cur))) + (begin (advance! 1) (read-ident ctor-start))) + (push! "ctor" (slice src ctor-start pos) start))) + (step))) ((try-punct start) (step)) (else (error diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2d34bf31..44e30220 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -182,7 +182,10 @@ SX CEK evaluator (both JS and OCaml hosts) `(:exception-def NAME [ARG-TYPE-SRC])`. Runtime no-op since raise/match work on tagged ctor values. Built-ins: `Failure`/`Invalid_argument` via `failwith`/`invalid_arg`. -- [ ] Polymorphic variants (surface syntax `\`Tag value`; runtime same tagged list). +- [x] Polymorphic variants (surface syntax `` `Tag value ``; runtime same + tagged list as nominal ctors). Tokenizer recognises backtick + ctor; + parser/eval treat them identically to nominal ctors. Type system + handling deferred (proper row types). - [ ] Tests in `lib/ocaml/tests/adt.sx` — 40+ tests: ADTs, match, option/result. ### Phase 4 — Modules + functors @@ -374,6 +377,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 3 — polymorphic variants `` `Tag `` (+4 tests, 382 + total). Tokenizer recognises backtick followed by an upper ident, + tokenizing identically to nominal ctors. Parser and evaluator treat + them as ctors — same tagged-list runtime. Match patterns `` `Red `` + / `` `Pair (a, b) `` work without any extra wiring. Proper row + types in HM deferred. - 2026-05-08 Phase 6 — Float module: sqrt/sin/cos/pow/floor/ceil/round + pi constant (+6 tests, 378 total). Wraps host SX math primitives via `_float_*` builtins. From 0cf5c8f2197d3c60d96c63bef06ab30afe0e8e7c Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:01:04 +0000 Subject: [PATCH 041/298] ocaml: phase 5.1 expr_eval.ml baseline (9/9 pass) A tiny arithmetic-expression evaluator using: type expr = Lit of int | Add of expr*expr | Mul of expr*expr | Neg of expr let rec eval e = match e with | Lit n -> n | Add (a,b) -> ... Exercises type-decl + multi-arg ctor + recursive match end-to-end. Per-program timeout in run.sh bumped to 120s. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/expr_eval.ml | 19 +++++++++++++++++++ lib/ocaml/baseline/run.sh | 2 +- plans/ocaml-on-sx.md | 5 +++++ 4 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 lib/ocaml/baseline/expr_eval.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 167d68c1..4d328b0d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,6 +1,7 @@ { "closures.ml": 315, "exception_handle.ml": 4, + "expr_eval.ml": 16, "factorial.ml": 3628800, "list_ops.ml": 30, "module_use.ml": 3, diff --git a/lib/ocaml/baseline/expr_eval.ml b/lib/ocaml/baseline/expr_eval.ml new file mode 100644 index 00000000..59ee0e43 --- /dev/null +++ b/lib/ocaml/baseline/expr_eval.ml @@ -0,0 +1,19 @@ +(* Baseline: a tiny expression evaluator using ADTs + match *) +type expr = + | Lit of int + | Add of expr * expr + | Mul of expr * expr + | Neg of expr +;; + +let rec eval e = + match e with + | Lit n -> n + | Add (a, b) -> eval a + eval b + | Mul (a, b) -> eval a * eval b + | Neg x -> 0 - eval x +;; + +(* (1 + 2) * (3 + 4) - 5 = 21 - 5 = 16 *) +eval + (Add (Mul (Add (Lit 1, Lit 2), Add (Lit 3, Lit 4)), Neg (Lit 5))) diff --git a/lib/ocaml/baseline/run.sh b/lib/ocaml/baseline/run.sh index 6e62e688..8d5e75e7 100755 --- a/lib/ocaml/baseline/run.sh +++ b/lib/ocaml/baseline/run.sh @@ -36,7 +36,7 @@ for f in lib/ocaml/baseline/*.ml; do (eval "(ocaml-run-program (file-read \"$f\"))") EOF - output=$(timeout 60 "$SX_SERVER" < "$TMP" 2>/dev/null) + output=$(timeout 120 "$SX_SERVER" < "$TMP" 2>/dev/null) rm -f "$TMP" result=$(echo "$output" | awk ' diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 44e30220..9e5ba3b5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -377,6 +377,11 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5.1 — expr_eval.ml baseline (9/9 pass). A tiny + arithmetic-expression evaluator using ADT (`type expr = Lit | Add | + Mul | Neg`) + recursive eval + pattern match — exercises the full + type-decl + ctor + match pipeline end-to-end. Per-program timeout + bumped to 120s in run.sh. - 2026-05-08 Phase 3 — polymorphic variants `` `Tag `` (+4 tests, 382 total). Tokenizer recognises backtick followed by an upper ident, tokenizing identically to nominal ctors. Parser and evaluator treat From 98049d545831d9705827d45116fe13142763d787 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:06:44 +0000 Subject: [PATCH 042/298] ocaml: phase 1+3 record patterns { f = pat } (+4 tests, 386 total) Parser: { f1 = pat; f2 = pat; ... } in pattern position emits (:precord (FIELDNAME PAT)...). Mixed with the existing { in expression position via the at-pattern-atom? whitelist. Eval: :precord matches against a dict; required fields must be present and each pat must match the field's value. Can mix literal+var: 'match { x = 1; y = y } with | { x = 1; y = y } -> y' matches only when x is 1. --- lib/ocaml/eval.sx | 27 +++++++++++++++++++++++++++ lib/ocaml/parser.sx | 23 +++++++++++++++++++++++ lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 19 ++++++++++++------- 4 files changed, 78 insertions(+), 7 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index b92da38f..fe38fed5 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -224,6 +224,33 @@ (ocaml-env-extend env (nth pat 1) val)) ((= tag "plit") (if (= (ocaml-eval-lit (nth pat 1)) val) env ocaml-match-fail)) + ((= tag "precord") + ;; (:precord (FIELDNAME PAT) ...) — val must be a dict with each + ;; named field; each pat must match the field's value. + (cond + ((not (dict? val)) ocaml-match-fail) + (else + (let ((fields (rest pat)) (env-cur env) (failed false)) + (begin + (define one-field + (fn (kv) + (let ((k (first kv)) (p (nth kv 1))) + (cond + ((not (has-key? val k)) + (set! failed true)) + (else + (let ((env2 (ocaml-match-pat p (get val k) env-cur))) + (cond + ((= env2 ocaml-match-fail) (set! failed true)) + (else (set! env-cur env2))))))))) + (define loop + (fn (xs) + (when (and (not failed) (not (= xs (list)))) + (begin (one-field (first xs)) (loop (rest xs)))))) + (loop fields) + (cond + (failed ocaml-match-fail) + (else env-cur))))))) ((= tag "pcon") ;; (:pcon NAME PATS...) — val must be (NAME VALS...) with same arity. (let ((name (nth pat 1)) (arg-pats (rest (rest pat)))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 9dc08026..228059fb 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -250,6 +250,29 @@ (loop) (consume! "op" "]") (cons :plist items))))))) + ((and (= tt "op") (= tv "{")) + ;; Record pattern: { f1 = pat1; f2 = pat2; ... } + (begin + (advance-tok!) + (let ((fields (list))) + (begin + (define one + (fn () + (let ((fname (ocaml-tok-value (consume! "ident" nil)))) + (begin + (consume! "op" "=") + (let ((fp (parse-pattern))) + (append! fields (list fname fp))))))) + (one) + (define more + (fn () + (when (at-op? ";") + (begin (advance-tok!) + (when (not (at-op? "}")) + (begin (one) (more))))))) + (more) + (consume! "op" "}") + (cons :precord fields))))) (else (error (str diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 413f0645..3ee3766e 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -946,6 +946,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2503) (eval "(ocaml-run \"match \`Pair (1, 2) with | \`Pair (a, b) -> a + b\")") +;; ── Record patterns ─────────────────────────────────────────── +(epoch 2600) +(eval "(ocaml-run \"match { x = 1; y = 2 } with | { x = a; y = b } -> a + b\")") +(epoch 2601) +(eval "(ocaml-run \"match { name = \\\"Bob\\\"; age = 30 } with | { name = n; age = a } -> a\")") +(epoch 2602) +(eval "(ocaml-run \"match { x = 1; y = 2 } with | { x = 1; y = y } -> y | _ -> 0\")") +(epoch 2603) +(eval "(ocaml-run \"match { x = 5; y = 2 } with | { x = 1; y = y } -> y | _ -> 0\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1496,6 +1506,12 @@ check 2501 "polyvar Some 42" '("Some" 42)' check 2502 "polyvar match" '1' check 2503 "polyvar Pair (a,b)" '3' +# ── Record patterns ──────────────────────────────────────────── +check 2600 "match record bind both" '3' +check 2601 "match record name+age" '30' +check 2602 "match record literal x=1" '2' +check 2603 "match record literal fail" '0' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9e5ba3b5..ae745d26 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -134,13 +134,12 @@ SX CEK evaluator (both JS and OCaml hosts) `type`/`module`/`exception`/`open`/`include` decls, `try`/`with`, `function`, record literals/updates, field access, `and` mutually-recursive bindings.)_ -- [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple - args `Pair (a, b)` → `(:pcon "Pair" PA PB)`), literal (int/string/char/ - bool/unit), variable, wildcard `_`, tuple, list cons `::`, list - literal, `as` binding (`pat as name`). Match clauses support `when` - guard via `(:case-when PAT GUARD BODY)`. _(Pending: record patterns, - or-pattern `P1 | P2` — ambiguous with clause separator without - lookahead.)_ +- [x] **Patterns:** constructor (nullary + with args, incl. flattened tuple + args), literal (int/string/char/bool/unit), variable, wildcard `_`, + tuple, list cons `::`, list literal, record `{ f = pat; … }`, + `as` binding. Match clauses support `when` guard via + `(:case-when PAT GUARD BODY)`. _(Pending: or-pattern `P1 | P2` — + ambiguous with clause separator without lookahead.)_ - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. - [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests. @@ -377,6 +376,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 1+3 — record patterns `{ f = pat; … }` (+4 tests, + 386 total). Parser adds `(:precord (FIELD PAT) …)` alongside + the existing record-literal `{` handling. Eval matches against + dicts: required fields must be present and each pat must match the + value. Can mix with literals: `{ x = 1; y = y }` matches only when + x is 1. - 2026-05-08 Phase 5.1 — expr_eval.ml baseline (9/9 pass). A tiny arithmetic-expression evaluator using ADT (`type expr = Lit | Add | Mul | Neg`) + recursive eval + pattern match — exercises the full From 76ccbfbab6672be856d8589ffaf9e46545d2af4b Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:07:55 +0000 Subject: [PATCH 043/298] ocaml: refresh scoreboard (387/387 across 14 suites) --- lib/ocaml/scoreboard.json | 14 +++++++------- lib/ocaml/scoreboard.md | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 5dece3f2..d3655d19 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -2,20 +2,20 @@ "suites": { "eval-core": {"pass": 50, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 62, "fail": 0}, - "parser": {"pass": 95, "fail": 0}, + "misc": {"pass": 71, "fail": 0}, + "parser": {"pass": 100, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, "phase2-loops": {"pass": 4, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, - "phase3-adt": {"pass": 24, "fail": 0}, + "phase3-adt": {"pass": 26, "fail": 0}, "phase4-modules": {"pass": 12, "fail": 0}, - "phase5-hm": {"pass": 34, "fail": 0}, - "phase6-stdlib": {"pass": 43, "fail": 0}, + "phase5-hm": {"pass": 37, "fail": 0}, + "phase6-stdlib": {"pass": 47, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 364, + "total_pass": 387, "total_fail": 0, - "total": 364 + "total": 387 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index c32a550f..6e45565d 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,20 +1,20 @@ # OCaml-on-SX scoreboard -364 / 364 tests passing. +387 / 387 tests passing. | Suite | Pass | Fail | |---|---:|---:| | eval-core | 50 | 0 | | let-and | 3 | 0 | -| misc | 62 | 0 | -| parser | 95 | 0 | +| misc | 71 | 0 | +| parser | 100 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | | phase2-loops | 4 | 0 | | phase2-refs | 6 | 0 | -| phase3-adt | 24 | 0 | +| phase3-adt | 26 | 0 | | phase4-modules | 12 | 0 | -| phase5-hm | 34 | 0 | -| phase6-stdlib | 43 | 0 | +| phase5-hm | 37 | 0 | +| phase6-stdlib | 47 | 0 | | tokenize | 18 | 0 | From ad252088c3aa9f45c3f3a26cf969b778f8c0b628 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:11:45 +0000 Subject: [PATCH 044/298] =?UTF-8?q?ocaml:=20phase=204=20module=20type=20S?= =?UTF-8?q?=20=3D=20sig=20=E2=80=A6=20end=20parser=20(+3=20tests,=20389=20?= =?UTF-8?q?total)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit module type S = sig DECLS end is parsed-and-discarded — sig..end balanced skipping in parse-decl-module-type. AST (:module-type-def NAME). Runtime no-op (signatures are type-level only). Allows real OCaml programs with module type decls to parse and run without stripping the sig blocks. --- lib/ocaml/eval.sx | 4 ++++ lib/ocaml/parser.sx | 43 +++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 12 +++++++++--- 4 files changed, 69 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index fe38fed5..6f808939 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -704,6 +704,7 @@ (set! result (merge result (dict mname mod-val)))))))) ((= tag "type-def") nil) ((= tag "exception-def") nil) + ((= tag "module-type-def") nil) ((= tag "open") (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) (cond @@ -884,6 +885,9 @@ ;; exception E [of T] — purely declarative; raise+match ;; already work on tagged ctor values. nil) + ((= tag "module-type-def") + ;; module type S = sig … end — no-op at runtime. + nil) ((or (= tag "open") (= tag "include")) ;; open M / include M — bring M's bindings into scope. (let ((mod-val (ocaml-resolve-module-path (nth decl 1) env))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 228059fb..6befa7a3 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -1282,10 +1282,53 @@ (else (begin (advance-tok!) (skip)))))) (skip))))))) + ;; module type S = sig ... end + ;; Parsed-and-discarded (signatures are type-level only). Returns + ;; a (:module-type-def NAME) marker for the eval loop to ignore. + (define + parse-decl-module-type + (fn () + (advance-tok!) ;; "type" + (let ((name (ocaml-tok-value (consume! "ctor" nil)))) + (begin + (consume! "op" "=") + (cond + ((at-kw? "sig") + (begin + (advance-tok!) + (let ((depth 1)) + (begin + (define skip + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((or (at-kw? "sig") (at-kw? "struct") (at-kw? "begin")) + (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) + ((at-kw? "end") + (cond + ((= depth 1) nil) + (else + (begin (set! depth (- depth 1)) (advance-tok!) (skip))))) + (else (begin (advance-tok!) (skip)))))) + (skip) + (consume! "keyword" "end"))))) + (else + ;; module type S = AnotherSig — skip-to-boundary. + (skip-to-boundary!))) + (list :module-type-def name))))) + (define parse-decl-module (fn () (advance-tok!) + (cond + ((at-kw? "type") (parse-decl-module-type)) + (else (parse-decl-module-rest))))) + + (define + parse-decl-module-rest + (fn () (let ((name (ocaml-tok-value (consume! "ctor" nil))) (params (list))) (begin diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 3ee3766e..d7d14a13 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -956,6 +956,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2603) (eval "(ocaml-run \"match { x = 5; y = 2 } with | { x = 1; y = y } -> y | _ -> 0\")") +;; ── module type S = sig … end ───────────────────────────────── +(epoch 2700) +(eval "(ocaml-parse-program \"module type S = sig val x : int val f : int -> int end\")") +(epoch 2701) +(eval "(ocaml-run-program \"module type S = sig val x : int end ;; module M = struct let x = 42 end ;; M.x\")") +(epoch 2702) +(eval "(ocaml-parse-program \"module type EMPTY = sig end\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1512,6 +1520,11 @@ check 2601 "match record name+age" '30' check 2602 "match record literal x=1" '2' check 2603 "match record literal fail" '0' +# ── module type S = sig … end ────────────────────────────────── +check 2700 "module type S parses" '("module-type-def" "S")' +check 2701 "module type then module" '42' +check 2702 "module type EMPTY" '("module-type-def" "EMPTY")' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ae745d26..f9653a9b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -191,9 +191,10 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] `module M = struct let x = 1 let f y = x + y end` → SX dict `{"x" 1 "f" }`. -- [~] `module type S = sig val x : int val f : int -> int end` — signature - annotations are parsed-and-skipped (`skip-optional-sig`); typed - checking deferred to Phase 5. +- [x] `module type S = sig val x : int val f : int -> int end` parses + via `parse-decl-module-type`. Signature contents are skipped + (sig..end nesting tracked) — runtime no-op since types are + structural. AST: `(:module-type-def NAME)`. - [x] `module M : S = struct ... end` — coercive sealing (signature ignored). - [x] `functor (M : S) -> struct ... end` via shorthand `module F (M) = …`. - [x] `module F = Functor(Base)` — functor application; multi-param via @@ -376,6 +377,11 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 4 — `module type S = sig … end` parser (+3 tests, + 389 total). Signatures are parsed-and-discarded — sig..end balanced + skipping. AST: `(:module-type-def NAME)`. Runtime no-op (signatures + are type-level). Allows real OCaml code with module type decls to + parse and run without removing the sig blocks. - 2026-05-08 Phase 1+3 — record patterns `{ f = pat; … }` (+4 tests, 386 total). Parser adds `(:precord (FIELD PAT) …)` alongside the existing record-literal `{` handling. Eval matches against From 86343345dc4c3ce05d74aa1973f0f57442c34e74 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:22:34 +0000 Subject: [PATCH 045/298] ocaml: phase 1+3 or-patterns (P1 | P2 | ...) parens-only (+5 tests, 394 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parser: when | follows a pattern inside parens, build (:por ALT1 ALT2 ...). Eval: try alternatives, succeed on first match. Top-level | remains the clause separator — parens-only avoids ambiguity without lookahead. Examples now work: match n with | (1 | 2 | 3) -> 100 | _ -> 0 match c with | (Red | Green) -> 1 | Blue -> 2 --- lib/ocaml/eval.sx | 16 ++++++++++++++++ lib/ocaml/parser.sx | 13 +++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 11 ++++++++--- 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 6f808939..35115a33 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -259,6 +259,22 @@ (= (len (rest val)) (len arg-pats))) (ocaml-match-list arg-pats (rest val) env)) (else ocaml-match-fail)))) + ((= tag "por") + ;; (:por ALT1 ALT2 ...) — try each alternative; succeed on the + ;; first match. Note: for now, alternatives must bind the same + ;; set of variables (OCaml constraint) — we don't enforce this. + (let ((alts (rest pat)) (result ocaml-match-fail) (done false)) + (begin + (define try-alts + (fn (xs) + (when (and (not done) (not (= xs (list)))) + (let ((env2 (ocaml-match-pat (first xs) val env))) + (cond + ((not (= env2 ocaml-match-fail)) + (begin (set! result env2) (set! done true))) + (else (try-alts (rest xs)))))))) + (try-alts alts) + result))) ((= tag "pas") ;; (:pas INNER NAME) — match inner pattern, also bind NAME → val. (let ((inner (nth pat 1)) (alias (nth pat 2))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 6befa7a3..cf71cad7 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -223,6 +223,19 @@ (loop) (consume! "op" ")") (cons :ptuple items)))) + ;; Parens-only or-pattern: (P1 | P2 | ...). + ((at-op? "|") + (let ((alts (list first))) + (begin + (define loop-or + (fn () + (when (at-op? "|") + (begin (advance-tok!) + (append! alts (parse-pattern)) + (loop-or))))) + (loop-or) + (consume! "op" ")") + (cons :por alts)))) (else (begin (consume! "op" ")") first)))))))) ((and (= tt "op") (= tv "[")) (begin diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index d7d14a13..b730db9a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -964,6 +964,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2702) (eval "(ocaml-parse-program \"module type EMPTY = sig end\")") +;; ── or-patterns (parens-only) ───────────────────────────────── +(epoch 2800) +(eval "(ocaml-run \"match 1 with | (1 | 2 | 3) -> 100 | _ -> 0\")") +(epoch 2801) +(eval "(ocaml-run \"match 2 with | (1 | 2 | 3) -> 100 | _ -> 0\")") +(epoch 2802) +(eval "(ocaml-run \"match 5 with | (1 | 2 | 3) -> 100 | _ -> 0\")") +(epoch 2803) +(eval "(ocaml-run \"match Red with | (Red | Green) -> 1 | Blue -> 2\")") +(epoch 2804) +(eval "(ocaml-run \"match Blue with | (Red | Green) -> 1 | Blue -> 2\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1525,6 +1537,13 @@ check 2700 "module type S parses" '("module-type-def" "S")' check 2701 "module type then module" '42' check 2702 "module type EMPTY" '("module-type-def" "EMPTY")' +# ── or-patterns (parens-only) ────────────────────────────────── +check 2800 "(1|2|3) match 1" '100' +check 2801 "(1|2|3) match 2" '100' +check 2802 "(1|2|3) match 5" '0' +check 2803 "(Red|Green) Red" '1' +check 2804 "(Red|Green) Blue" '2' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f9653a9b..2cbb90cb 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -137,9 +137,9 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] **Patterns:** constructor (nullary + with args, incl. flattened tuple args), literal (int/string/char/bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal, record `{ f = pat; … }`, - `as` binding. Match clauses support `when` guard via - `(:case-when PAT GUARD BODY)`. _(Pending: or-pattern `P1 | P2` — - ambiguous with clause separator without lookahead.)_ + `as` binding, or-pattern `(P1 | P2 | …)` (parens-only — top-level + `|` is the clause separator). Match clauses support `when` guard + via `(:case-when PAT GUARD BODY)`. - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. - [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests. @@ -377,6 +377,11 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 1+3 — or-patterns `(P1 | P2 | ...)` parens-only + (+5 tests, 394 total). Parser: when `|` follows a pattern inside + parens, build `(:por ALT1 ALT2 ...)`. Eval: try alternatives, succeed + on the first match. Top-level `|` remains the clause separator (no + lookahead needed). Examples: `(1 | 2 | 3) -> ...`, `(Red | Green) -> 1`. - 2026-05-08 Phase 4 — `module type S = sig … end` parser (+3 tests, 389 total). Signatures are parsed-and-discarded — sig..end balanced skipping. AST: `(:module-type-def NAME)`. Runtime no-op (signatures From 9f05e24c52f1d07fccb5db703e3acae5673dc813 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:30:29 +0000 Subject: [PATCH 046/298] ocaml: phase 6 List.take/drop/filter_map/flat_map (+6 tests, 400 total) Common functional helpers written in OCaml. flat_map / concat_map share an implementation. 400-test milestone. --- lib/ocaml/runtime.sx | 29 +++++++++++++++++++++++++++++ lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 3 +++ 3 files changed, 54 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 0c9b3e0e..0ea5c882 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -198,6 +198,35 @@ match ys with | [] -> failwith \"List.map2: unequal\" | hy :: ty -> f hx hy :: map2 f tx ty + + let rec take n xs = + if n <= 0 then [] + else + match xs with + | [] -> [] + | h :: t -> h :: take (n - 1) t + + let rec drop n xs = + if n <= 0 then xs + else + match xs with + | [] -> [] + | _ :: t -> drop (n - 1) t + + let rec filter_map f xs = + match xs with + | [] -> [] + | h :: t -> + match f h with + | None -> filter_map f t + | Some v -> v :: filter_map f t + + let rec flat_map f xs = + match xs with + | [] -> [] + | h :: t -> append (f h) (flat_map f t) + + let concat_map = flat_map end ;; module Option = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index b730db9a..4c39a5ae 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -976,6 +976,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2804) (eval "(ocaml-run \"match Blue with | (Red | Green) -> 1 | Blue -> 2\")") +;; ── More List utilities (take/drop/filter_map/flat_map) ────── +(epoch 2900) +(eval "(ocaml-run \"List.take 3 [1;2;3;4;5]\")") +(epoch 2901) +(eval "(ocaml-run \"List.drop 2 [1;2;3;4;5]\")") +(epoch 2902) +(eval "(ocaml-run \"List.filter_map (fun x -> if x > 2 then Some (x * 10) else None) [1;2;3;4]\")") +(epoch 2903) +(eval "(ocaml-run \"List.flat_map (fun x -> [x; x]) [1;2;3]\")") +(epoch 2904) +(eval "(ocaml-run \"List.take 0 [1;2;3]\")") +(epoch 2905) +(eval "(ocaml-run \"List.take 100 [1;2;3]\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1544,6 +1558,14 @@ check 2802 "(1|2|3) match 5" '0' check 2803 "(Red|Green) Red" '1' check 2804 "(Red|Green) Blue" '2' +# ── List.take/drop/filter_map/flat_map ───────────────────────── +check 2900 "List.take 3" '(1 2 3)' +check 2901 "List.drop 2" '(3 4 5)' +check 2902 "List.filter_map" '(30 40)' +check 2903 "List.flat_map double" '(1 1 2 2 3 3)' +check 2904 "List.take 0" '()' +check 2905 "List.take overflow" '(1 2 3)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2cbb90cb..2d6d0a26 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -377,6 +377,9 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — `List.take/drop/filter_map/flat_map/concat_map` + (+6 tests, 400 total). Common functional helpers, all written in + OCaml. **400-test milestone.** - 2026-05-08 Phase 1+3 — or-patterns `(P1 | P2 | ...)` parens-only (+5 tests, 394 total). Parser: when `|` follows a pattern inside parens, build `(:por ALT1 ALT2 ...)`. Eval: try alternatives, succeed From f40dfbbeb5a2b709c555a10a3ebf2266dc3dde9e Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:34:18 +0000 Subject: [PATCH 047/298] ocaml: phase 6 String extensions (+6 tests, 406 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ends_with, contains, trim, split_on_char, replace_all, index_of — wrap host SX primitives via new _string_* builtins. String module now substantively covers OCaml's Stdlib.String. --- lib/ocaml/eval.sx | 9 +++++++++ lib/ocaml/runtime.sx | 6 ++++++ lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++--- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 35115a33..03a93208 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -58,6 +58,15 @@ (list "_string_upper" (fn (s) (upper s))) (list "_string_lower" (fn (s) (lower s))) (list "_string_starts_with" (fn (p) (fn (s) (starts-with? s p)))) + (list "_string_ends_with" (fn (p) (fn (s) (ends-with? s p)))) + (list "_string_contains" (fn (s) (fn (sub) (contains? s sub)))) + (list "_string_trim" (fn (s) (trim s))) + (list "_string_split_on_char" + (fn (sep) (fn (s) (split s sep)))) + (list "_string_replace" + (fn (s) (fn (a) (fn (b) (replace s a b))))) + (list "_string_index_of" + (fn (s) (fn (sub) (index-of s sub)))) (list "_int_of_string" (fn (s) (parse-number s))) (list "_string_of_int" (fn (i) (str i))) (list "_string_of_float" (fn (f) (str f))) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 0ea5c882..e1af957d 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -326,6 +326,12 @@ let uppercase_ascii s = _string_upper s let lowercase_ascii s = _string_lower s let starts_with prefix s = _string_starts_with prefix s + let ends_with suffix s = _string_ends_with suffix s + let contains s sub = _string_contains s sub + let trim s = _string_trim s + let split_on_char c s = _string_split_on_char c s + let replace_all s a b = _string_replace s a b + let index_of s sub = _string_index_of s sub end ;; module Char = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 4c39a5ae..43e2da40 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -990,6 +990,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 2905) (eval "(ocaml-run \"List.take 100 [1;2;3]\")") +;; ── String extensions ────────────────────────────────────────── +(epoch 3000) +(eval "(ocaml-run \"String.ends_with \\\"lo\\\" \\\"hello\\\"\")") +(epoch 3001) +(eval "(ocaml-run \"String.contains \\\"hello\\\" \\\"ell\\\"\")") +(epoch 3002) +(eval "(ocaml-run \"String.trim \\\" hi \\\"\")") +(epoch 3003) +(eval "(ocaml-run \"String.split_on_char \\\" \\\" \\\"a b c\\\"\")") +(epoch 3004) +(eval "(ocaml-run \"String.replace_all \\\"hello\\\" \\\"l\\\" \\\"r\\\"\")") +(epoch 3005) +(eval "(ocaml-run \"String.index_of \\\"hello\\\" \\\"ll\\\"\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1566,6 +1580,14 @@ check 2903 "List.flat_map double" '(1 1 2 2 3 3)' check 2904 "List.take 0" '()' check 2905 "List.take overflow" '(1 2 3)' +# ── String extensions ────────────────────────────────────────── +check 3000 "String.ends_with" 'true' +check 3001 "String.contains" 'true' +check 3002 "String.trim" '"hi"' +check 3003 "String.split_on_char" '("a" "b" "c")' +check 3004 "String.replace_all" '"herro"' +check 3005 "String.index_of" '2' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2d6d0a26..37288d34 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -262,9 +262,9 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] `Hashtbl`: `create`, `add`, `find`, `find_opt`, `replace`, `mem`, `length`. Backed by a one-element list cell holding a SX dict; keys coerced to strings via `str` for polymorphic-key support. -- [~] `String`: `length`, `get`, `sub`, `concat`, `uppercase_ascii`, - `lowercase_ascii`, `starts_with`. _(Pending: split_on_char, trim, - contains, ends_with, index_opt, replace_all.)_ +- [x] `String`: `length`, `get`, `sub`, `concat`, `uppercase_ascii`, + `lowercase_ascii`, `starts_with`, `ends_with`, `contains`, `trim`, + `split_on_char`, `replace_all`, `index_of`. - [~] `Char`: `code`, `chr`, `lowercase_ascii`, `uppercase_ascii`. _(Pending: escaped.)_ - [~] `Int`: `to_string`, `of_string`, `abs`, `max`, `min`. @@ -377,6 +377,9 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — String extensions: ends_with/contains/trim/ + split_on_char/replace_all/index_of (+6 tests, 406 total). Wraps host + primitives via `_string_*` builtins. - 2026-05-08 Phase 6 — `List.take/drop/filter_map/flat_map/concat_map` (+6 tests, 400 total). Common functional helpers, all written in OCaml. **400-test milestone.** From d61ee088c57dda8348195c849fc1bfbfee2bb068 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:35:28 +0000 Subject: [PATCH 048/298] ocaml: refresh scoreboard (407/407 across 14 suites) --- lib/ocaml/scoreboard.json | 10 +++++----- lib/ocaml/scoreboard.md | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index d3655d19..4e1b55af 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -3,19 +3,19 @@ "eval-core": {"pass": 50, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, "misc": {"pass": 71, "fail": 0}, - "parser": {"pass": 100, "fail": 0}, + "parser": {"pass": 106, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, "phase2-loops": {"pass": 4, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, "phase3-adt": {"pass": 26, "fail": 0}, - "phase4-modules": {"pass": 12, "fail": 0}, + "phase4-modules": {"pass": 14, "fail": 0}, "phase5-hm": {"pass": 37, "fail": 0}, - "phase6-stdlib": {"pass": 47, "fail": 0}, + "phase6-stdlib": {"pass": 59, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 387, + "total_pass": 407, "total_fail": 0, - "total": 387 + "total": 407 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index 6e45565d..8188dda9 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,20 +1,20 @@ # OCaml-on-SX scoreboard -387 / 387 tests passing. +407 / 407 tests passing. | Suite | Pass | Fail | |---|---:|---:| | eval-core | 50 | 0 | | let-and | 3 | 0 | | misc | 71 | 0 | -| parser | 100 | 0 | +| parser | 106 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | | phase2-loops | 4 | 0 | | phase2-refs | 6 | 0 | | phase3-adt | 26 | 0 | -| phase4-modules | 12 | 0 | +| phase4-modules | 14 | 0 | | phase5-hm | 37 | 0 | -| phase6-stdlib | 47 | 0 | +| phase6-stdlib | 59 | 0 | | tokenize | 18 | 0 | From 4bca2cacff0e66c015b4b7e4a08e1d3c9ddcc173 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:43:16 +0000 Subject: [PATCH 049/298] ocaml: phase 5 parse ctor arg types in user type-defs (+3 tests, 409 total) ocaml-hm-parse-type-src recognises primitive type names (int/bool/ string/float/unit), tyvars 'a, and simple parametric T list / T option. Replaces the previous int-by-default placeholder in ocaml-hm-register-type-def!. So 'type tag = TStr of string | TInt of int' correctly registers TStr : string -> tag and TInt : int -> tag. Pattern-match on tag gives proper field types in the body. Multi-arg / function types still fall back to a fresh tv. --- lib/ocaml/infer.sx | 45 ++++++++++++++++++++++++++++++++++---------- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++-- 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index a37c5c59..84beebf2 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -412,11 +412,35 @@ (set-nth! ocaml-hm-ctors 0 (merge (nth ocaml-hm-ctors 0) (dict name scheme))))) -;; Process a :type-def AST. For each ctor, build its scheme: -;; nullary `A` → scheme [] (con NAME [param-tvs...]) -;; ctor `B of int` → scheme [] (int -> (con NAME [...])) -;; Argument types are ignored for now (they're raw source strings) — we -;; assume `int`. A future iteration parses arg types properly. +;; Parse a simple type source into an HM type. Handles primitive type +;; names, type variables `'a`, parametric `'a list`, `T1 * T2`, and +;; function `T1 -> T2`. Unknown tokens default to a fresh tv so the +;; result is at worst polymorphic, never wrong. +(define ocaml-hm-parse-type-src + (fn (src) + (let ((s (trim src))) + (cond + ((= s "int") (hm-int)) + ((= s "bool") (hm-bool)) + ((= s "string") (hm-string)) + ((= s "float") (ocaml-hm-float)) + ((= s "unit") (hm-con "Unit" (list))) + ((and (> (len s) 1) (= (nth s 0) "'")) + (hm-tv (slice s 1 (len s)))) + ;; "T list" / "T option" — split on space, treat last as ctor. + (else + (let ((parts (filter (fn (p) (not (= p ""))) (split s " ")))) + (cond + ((= (len parts) 2) + (let ((arg (ocaml-hm-parse-type-src (first parts))) + (head (nth parts 1))) + (hm-con head (list arg)))) + (else + ;; Unknown: emit a fresh tv so unification stays sound. + (hm-tv (str "_unknown")))))))))) + +;; Process a :type-def AST. For each ctor, build its scheme. Multi-arg +;; ctors are a list of types — we model that as a tuple arg. (define ocaml-hm-register-type-def! (fn (type-def) (let ((name (nth type-def 1)) @@ -434,11 +458,12 @@ (ocaml-hm-ctor-register! cname (hm-scheme params self-type))) (else - ;; Single-arg ctor with arg type `int` (placeholder). - ;; Multi-arg or other-typed ctors fall back to int. - (ocaml-hm-ctor-register! cname - (hm-scheme params - (hm-arrow (hm-int) self-type)))))))) + ;; ARG-SRCS is a list of source strings, often a + ;; single combined string `T1 * T2 * ...`. Parse. + (let ((arg-type (ocaml-hm-parse-type-src (first arg-srcs)))) + (ocaml-hm-ctor-register! cname + (hm-scheme params + (hm-arrow arg-type self-type))))))))) (for-each register-ctor ctors))))))) (set! ocaml-infer diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 43e2da40..edda595a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1004,6 +1004,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3005) (eval "(ocaml-run \"String.index_of \\\"hello\\\" \\\"ll\\\"\")") +;; ── HM with parsed ctor arg types ────────────────────────────── +(epoch 3100) +(eval "(ocaml-type-of-program \"type shape = Circle of int | Square of int;; let area s = match s with | Circle r -> r * r | Square s -> s * s;; area\")") +(epoch 3101) +(eval "(ocaml-type-of-program \"type tag = TStr of string | TInt of int;; TStr \\\"hi\\\"\")") +(epoch 3102) +(eval "(ocaml-type-of-program \"type t = A of bool | B of float;; A true\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1588,6 +1596,11 @@ check 3003 "String.split_on_char" '("a" "b" "c")' check 3004 "String.replace_all" '"herro"' check 3005 "String.index_of" '2' +# ── HM with parsed ctor arg types ────────────────────────────── +check 3100 "shape -> Int" '"shape -> Int"' +check 3101 "TStr 'hi' : tag" '"tag"' +check 3102 "A true : t" '"t"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 37288d34..32e5853f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -236,8 +236,10 @@ SX CEK evaluator (both JS and OCaml hosts) `ocaml-hm-register-type-def!` registers user types from `:type-def`. `ocaml-type-of-program` threads decls through the env, registering types and binding `let` schemes. `:con NAME` / `:pcon NAME …` - instantiate from the registry. _(Caveat: ctor arg types currently - default to `int` — proper type parsing pending.)_ + instantiate from the registry. Ctor arg types parsed via + `ocaml-hm-parse-type-src` — handles primitives (`int`/`bool`/ + `string`/`float`/`unit`), tyvars `'a`, simple parametric `T list`/ + `T option`. Multi-arg/complex types fall back to a fresh tv. - [~] Function types `T1 -> T2` work; tuples (`'a * 'b`) and lists (`'a list`) supported. Records pending. - [ ] Type signatures: `val f : int -> int` — verify against inferred type. @@ -377,6 +379,12 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5 — parse simple type sources in user type-defs + (+3 tests, 409 total). `ocaml-hm-parse-type-src` recognises + primitive type names, tyvars `'a`, and `T list`/`T option`-style + parametric types. Replaces the old "default to Int" placeholder so + `type t = TStr of string` correctly registers `TStr : string -> t`. + Multi-arg / function types still fall back to a fresh tv. - 2026-05-08 Phase 6 — String extensions: ends_with/contains/trim/ split_on_char/replace_all/index_of (+6 tests, 406 total). Wraps host primitives via `_string_*` builtins. From cd93b1132806f4ff06b49a6f5a58ab5b8e37ded5 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:46:33 +0000 Subject: [PATCH 050/298] ocaml: phase 6 Sys module constants (+5 tests, 414 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit os_type="SX", word_size=64, max_array_length, max_string_length, executable_name="ocaml-on-sx", big_endian=false, unix=true, win32=false, cygwin=false. Constants-only for now — argv/getenv_opt/ command would need host platform integration. --- lib/ocaml/runtime.sx | 12 ++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 39 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index e1af957d..ce31974f 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -366,6 +366,18 @@ let printf fmt = print_string fmt end ;; + module Sys = struct + let os_type = \"SX\" + let word_size = 64 + let max_array_length = 4611686018427387903 + let max_string_length = 4611686018427387903 + let executable_name = \"ocaml-on-sx\" + let big_endian = false + let unix = true + let win32 = false + let cygwin = false + end ;; + module Hashtbl = struct let create n = _hashtbl_create n let add t k v = _hashtbl_add t k v diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index edda595a..a9eec931 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1012,6 +1012,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3102) (eval "(ocaml-type-of-program \"type t = A of bool | B of float;; A true\")") +;; ── Sys module stubs ────────────────────────────────────────── +(epoch 3200) +(eval "(ocaml-run \"Sys.os_type\")") +(epoch 3201) +(eval "(ocaml-run \"Sys.word_size\")") +(epoch 3202) +(eval "(ocaml-run \"Sys.unix\")") +(epoch 3203) +(eval "(ocaml-run \"Sys.win32\")") +(epoch 3204) +(eval "(ocaml-run \"Sys.executable_name\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1601,6 +1613,13 @@ check 3100 "shape -> Int" '"shape -> Int"' check 3101 "TStr 'hi' : tag" '"tag"' check 3102 "A true : t" '"t"' +# ── Sys stubs ────────────────────────────────────────────────── +check 3200 "Sys.os_type" '"SX"' +check 3201 "Sys.word_size" '64' +check 3202 "Sys.unix" 'true' +check 3203 "Sys.win32" 'false' +check 3204 "Sys.executable_name" '"ocaml-on-sx"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 32e5853f..bdec70c7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -264,6 +264,10 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] `Hashtbl`: `create`, `add`, `find`, `find_opt`, `replace`, `mem`, `length`. Backed by a one-element list cell holding a SX dict; keys coerced to strings via `str` for polymorphic-key support. +- [~] `Sys`: `os_type` (`"SX"`), `word_size`, `max_array_length`, + `max_string_length`, `executable_name`, `big_endian`, `unix`, + `win32`, `cygwin`. Constants only; `argv`/`getenv_opt`/`command` + pending (would need host platform integration). - [x] `String`: `length`, `get`, `sub`, `concat`, `uppercase_ascii`, `lowercase_ascii`, `starts_with`, `ends_with`, `contains`, `trim`, `split_on_char`, `replace_all`, `index_of`. @@ -379,6 +383,10 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — `Sys` module constants (+5 tests, 414 total). + os_type, word_size, max_array_length, max_string_length, + executable_name, big_endian, unix, win32, cygwin. Constants-only + for now; `argv`/`getenv_opt`/`command` need host platform integration. - 2026-05-08 Phase 5 — parse simple type sources in user type-defs (+3 tests, 409 total). `ocaml-hm-parse-type-src` recognises primitive type names, tyvars `'a`, and `T list`/`T option`-style From 85867e329bf05b9126a15fc6de69f6098fc32eab Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:50:22 +0000 Subject: [PATCH 051/298] ocaml: phase 6 Map.Make / Set.Make functors (+4 tests, 418 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both written in OCaml inside lib/ocaml/runtime.sx: module Map = struct module Make (Ord) = struct let empty = [] let add k v m = ... (* sorted insert via Ord.compare *) let find_opt / find / mem / remove / bindings / cardinal end end module Set = struct module Make (Ord) = struct let empty = [] let mem / add / remove / elements / cardinal end end Sorted association list / sorted list backing — linear ops but correct. Strong substrate-validation: Map.Make is a non-trivial functor implemented entirely on top of the OCaml-on-SX evaluator. --- lib/ocaml/runtime.sx | 84 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 16 +++++++++ plans/ocaml-on-sx.md | 15 ++++++-- 3 files changed, 113 insertions(+), 2 deletions(-) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index ce31974f..5afd020f 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -389,6 +389,90 @@ | Some v -> v let mem t k = _hashtbl_mem t k let length t = _hashtbl_length t + end ;; + + module Map = struct + module Make (Ord) = struct + let empty = [] + + let rec add k v m = + match m with + | [] -> [(k, v)] + | (k2, v2) :: rest -> + begin + let c = Ord.compare k k2 in + if c = 0 then (k, v) :: rest + else if c < 0 then (k, v) :: m + else (k2, v2) :: add k v rest + end + + let rec find_opt k m = + match m with + | [] -> None + | (k2, v) :: rest -> + if Ord.compare k k2 = 0 then Some v + else find_opt k rest + + let find k m = + match find_opt k m with + | None -> failwith \"Map.find: not found\" + | Some v -> v + + let rec mem k m = + match m with + | [] -> false + | (k2, _) :: rest -> if Ord.compare k k2 = 0 then true else mem k rest + + let rec remove k m = + match m with + | [] -> [] + | (k2, v) :: rest -> + if Ord.compare k k2 = 0 then rest else (k2, v) :: remove k rest + + let rec bindings m = m + + let rec cardinal m = + match m with + | [] -> 0 + | _ :: t -> 1 + cardinal t + end + end ;; + + module Set = struct + module Make (Ord) = struct + let empty = [] + + let rec mem x s = + match s with + | [] -> false + | h :: t -> + let c = Ord.compare x h in + if c = 0 then true + else if c < 0 then false + else mem x t + + let rec add x s = + match s with + | [] -> [x] + | h :: t -> + let c = Ord.compare x h in + if c = 0 then s + else if c < 0 then x :: s + else h :: add x t + + let rec remove x s = + match s with + | [] -> [] + | h :: t -> + if Ord.compare x h = 0 then t else h :: remove x t + + let rec elements s = s + + let rec cardinal s = + match s with + | [] -> 0 + | _ :: t -> 1 + cardinal t + end end") (define ocaml-stdlib-loaded false) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index a9eec931..0c2fdd5d 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1024,6 +1024,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3204) (eval "(ocaml-run \"Sys.executable_name\")") +;; ── Map.Make / Set.Make functors ────────────────────────────── +(epoch 3300) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntMap = Map.Make(IntOrd) ;; let m = IntMap.add 1 \\\"a\\\" IntMap.empty ;; let m = IntMap.add 2 \\\"b\\\" m ;; IntMap.find 1 m\")") +(epoch 3301) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntMap = Map.Make(IntOrd) ;; IntMap.cardinal (IntMap.add 1 \\\"a\\\" (IntMap.add 2 \\\"b\\\" IntMap.empty))\")") +(epoch 3302) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntSet = Set.Make(IntOrd) ;; IntSet.elements (IntSet.add 3 (IntSet.add 1 (IntSet.add 2 IntSet.empty)))\")") +(epoch 3303) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntSet = Set.Make(IntOrd) ;; IntSet.mem 2 (IntSet.add 3 (IntSet.add 1 (IntSet.add 2 IntSet.empty)))\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1620,6 +1630,12 @@ check 3202 "Sys.unix" 'true' check 3203 "Sys.win32" 'false' check 3204 "Sys.executable_name" '"ocaml-on-sx"' +# ── Map.Make / Set.Make ──────────────────────────────────────── +check 3300 "Map.find via functor" '"a"' +check 3301 "Map.cardinal" '2' +check 3302 "Set.elements sorted" '(1 2 3)' +check 3303 "Set.mem" 'true' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bdec70c7..d009b9d7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -286,8 +286,11 @@ SX CEK evaluator (both JS and OCaml hosts) - [ ] `Int`/`Float`: arithmetic, `to_string`, `of_string_opt`, `min_int`, `max_int`. - [ ] `Hashtbl`: `create`, `add`, `replace`, `find`, `find_opt`, `remove`, `mem`, `iter`, `fold`, `length` — backed by SX mutable dict. -- [ ] `Map.Make` functor — balanced BST backed by SX sorted dict. -- [ ] `Set.Make` functor. +- [x] `Map.Make` functor — sorted association list backed + (insert/find/remove/mem/cardinal/bindings); not a balanced tree + but linear with parametric `Ord` ordering. +- [x] `Set.Make` functor — sorted list backed + (add/mem/remove/elements/cardinal). - [ ] `Printf`: `sprintf`, `printf`, `eprintf` — format strings via `(format ...)`. - [ ] `Sys`: `argv`, `getenv_opt`, `getcwd` — via `perform` IO. - [ ] Scoreboard runner: `lib/ocaml/conformance.sh` + `scoreboard.json`. @@ -383,6 +386,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — `Map.Make` / `Set.Make` functors written in + OCaml (+4 tests, 418 total). Sorted association list / sorted list + backed (linear ops, but correct). Both take an `Ord` module supplying + `compare`. Tested: `module IntMap = Map.Make(IntOrd) ;; IntMap.find + …` and `IntSet.elements (IntSet.add 3 (IntSet.add 1 …))` returning + `[1; 2; 3]`. Strong substrate-validation for the functor system — + Map.Make is a non-trivial functor implemented entirely on top of the + OCaml-on-SX evaluator. - 2026-05-08 Phase 6 — `Sys` module constants (+5 tests, 414 total). os_type, word_size, max_array_length, max_string_length, executable_name, big_endian, unix, win32, cygwin. Constants-only From b297c83b1d2a9fcb719af5cb7a6dba59df7f288f Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 15:51:36 +0000 Subject: [PATCH 052/298] ocaml: refresh scoreboard (419/419 across 14 suites) --- lib/ocaml/scoreboard.json | 10 +++++----- lib/ocaml/scoreboard.md | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 4e1b55af..13f52239 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -2,20 +2,20 @@ "suites": { "eval-core": {"pass": 50, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 71, "fail": 0}, + "misc": {"pass": 81, "fail": 0}, "parser": {"pass": 106, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, "phase2-loops": {"pass": 4, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, - "phase3-adt": {"pass": 26, "fail": 0}, + "phase3-adt": {"pass": 27, "fail": 0}, "phase4-modules": {"pass": 14, "fail": 0}, - "phase5-hm": {"pass": 37, "fail": 0}, + "phase5-hm": {"pass": 38, "fail": 0}, "phase6-stdlib": {"pass": 59, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 407, + "total_pass": 419, "total_fail": 0, - "total": 407 + "total": 419 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index 8188dda9..de426c35 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,20 +1,20 @@ # OCaml-on-SX scoreboard -407 / 407 tests passing. +419 / 419 tests passing. | Suite | Pass | Fail | |---|---:|---:| | eval-core | 50 | 0 | | let-and | 3 | 0 | -| misc | 71 | 0 | +| misc | 81 | 0 | | parser | 106 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | | phase2-loops | 4 | 0 | | phase2-refs | 6 | 0 | -| phase3-adt | 26 | 0 | +| phase3-adt | 27 | 0 | | phase4-modules | 14 | 0 | -| phase5-hm | 37 | 0 | +| phase5-hm | 38 | 0 | | phase6-stdlib | 59 | 0 | | tokenize | 18 | 0 | From 404c908a9a595def05c011b4c352c0b45cc8a13e Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 16:02:45 +0000 Subject: [PATCH 053/298] ocaml: phase 6 Map/Set extensions iter/fold/filter/union/inter (+4 tests, 422 total) Map.Make: iter, fold, map, filter, is_empty added. Set.Make: iter, fold, filter, is_empty, union, inter added. All written in OCaml inside the existing functor bodies. Tested: IntMap.fold (fun k v acc -> acc + v) m 0 = 30 IntSet.elements (IntSet.union {1,2} {2,3}) = [1; 2; 3] IntSet.elements (IntSet.inter {1,2,3} {2,3,4}) = [2; 3] --- lib/ocaml/runtime.sx | 56 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 16 +++++++++++++ plans/ocaml-on-sx.md | 3 +++ 3 files changed, 75 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 5afd020f..97d1835a 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -435,6 +435,32 @@ match m with | [] -> 0 | _ :: t -> 1 + cardinal t + + let rec iter f m = + match m with + | [] -> () + | (k, v) :: t -> f k v; iter f t + + let rec fold f m acc = + match m with + | [] -> acc + | (k, v) :: t -> fold f t (f k v acc) + + let rec map f m = + match m with + | [] -> [] + | (k, v) :: t -> (k, f v) :: map f t + + let rec filter p m = + match m with + | [] -> [] + | (k, v) :: t -> + if p k v then (k, v) :: filter p t else filter p t + + let rec is_empty m = + match m with + | [] -> true + | _ -> false end end ;; @@ -472,6 +498,36 @@ match s with | [] -> 0 | _ :: t -> 1 + cardinal t + + let rec iter f s = + match s with + | [] -> () + | h :: t -> f h; iter f t + + let rec fold f s acc = + match s with + | [] -> acc + | h :: t -> fold f t (f h acc) + + let rec filter p s = + match s with + | [] -> [] + | h :: t -> if p h then h :: filter p t else filter p t + + let rec is_empty s = + match s with + | [] -> true + | _ -> false + + let rec union a b = + match b with + | [] -> a + | h :: t -> union (add h a) t + + let rec inter a b = + match a with + | [] -> [] + | h :: t -> if mem h b then h :: inter t b else inter t b end end") diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0c2fdd5d..0a0e75ba 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1034,6 +1034,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3303) (eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntSet = Set.Make(IntOrd) ;; IntSet.mem 2 (IntSet.add 3 (IntSet.add 1 (IntSet.add 2 IntSet.empty)))\")") +;; ── Map/Set fold/iter/filter/union/inter ────────────────────── +(epoch 3400) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntMap = Map.Make(IntOrd) ;; let m = IntMap.add 1 10 (IntMap.add 2 20 IntMap.empty) ;; IntMap.fold (fun k v acc -> acc + v) m 0\")") +(epoch 3401) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntMap = Map.Make(IntOrd) ;; let m = IntMap.add 1 10 IntMap.empty ;; IntMap.is_empty m\")") +(epoch 3402) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntSet = Set.Make(IntOrd) ;; let a = IntSet.add 1 (IntSet.add 2 IntSet.empty) ;; let b = IntSet.add 2 (IntSet.add 3 IntSet.empty) ;; IntSet.elements (IntSet.union a b)\")") +(epoch 3403) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntSet = Set.Make(IntOrd) ;; let a = IntSet.add 1 (IntSet.add 2 (IntSet.add 3 IntSet.empty)) ;; let b = IntSet.add 2 (IntSet.add 3 (IntSet.add 4 IntSet.empty)) ;; IntSet.elements (IntSet.inter a b)\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1636,6 +1646,12 @@ check 3301 "Map.cardinal" '2' check 3302 "Set.elements sorted" '(1 2 3)' check 3303 "Set.mem" 'true' +# ── Map/Set fold/iter/filter/union/inter ─────────────────────── +check 3400 "Map.fold sum" '30' +check 3401 "Map.is_empty false" 'false' +check 3402 "Set.union" '(1 2 3)' +check 3403 "Set.inter" '(2 3)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d009b9d7..bc10a8fb 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -386,6 +386,9 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 6 — Map/Set extensions: iter/fold/map/filter/ + is_empty + Set.union/inter (+4 tests, 422 total). Functor + bodies grow naturally — all in OCaml syntax. - 2026-05-08 Phase 6 — `Map.Make` / `Set.Make` functors written in OCaml (+4 tests, 418 total). Sorted association list / sorted list backed (linear ops, but correct). Both take an `Ord` module supplying From dbe3c6c203b935e1fe355219c9ff4a3e469200bc Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 16:11:03 +0000 Subject: [PATCH 054/298] ocaml: phase 5.1 word_count.ml baseline (10/10 pass) Uses Map.Make(StrOrd) + List.fold_left to count word frequencies; exercises the full functor pipeline with a real-world idiom: let inc_count m word = match StrMap.find_opt word m with | None -> StrMap.add word 1 m | Some n -> StrMap.add word (n + 1) m let count words = List.fold_left inc_count StrMap.empty words 10/10 baseline programs pass. --- lib/ocaml/baseline/expected.json | 3 ++- lib/ocaml/baseline/word_count.ml | 14 ++++++++++++++ plans/ocaml-on-sx.md | 3 +++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 lib/ocaml/baseline/word_count.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 4d328b0d..3452637e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -7,5 +7,6 @@ "module_use.ml": 3, "option_match.ml": 5, "quicksort.ml": 44, - "sum_squares.ml": 385 + "sum_squares.ml": 385, + "word_count.ml": 3 } diff --git a/lib/ocaml/baseline/word_count.ml b/lib/ocaml/baseline/word_count.ml new file mode 100644 index 00000000..d455225e --- /dev/null +++ b/lib/ocaml/baseline/word_count.ml @@ -0,0 +1,14 @@ +(* Baseline: word-frequency map over a list using Map.Make + List.fold_left *) +module StrOrd = struct let compare a b = compare a b end ;; +module StrMap = Map.Make(StrOrd) ;; + +let inc_count m word = + match StrMap.find_opt word m with + | None -> StrMap.add word 1 m + | Some n -> StrMap.add word (n + 1) m +;; + +let count words = List.fold_left inc_count StrMap.empty words ;; + +let m = count ["the"; "fox"; "the"; "dog"; "the"; "fox"] ;; +StrMap.find "the" m diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bc10a8fb..b44bcece 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -386,6 +386,9 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 5.1 — word_count.ml baseline (10/10 pass). Uses + Map.Make(StrOrd) + List.fold_left to count word frequencies; tests + the full functor pipeline with a real OCaml idiom. - 2026-05-08 Phase 6 — Map/Set extensions: iter/fold/map/filter/ is_empty + Set.union/inter (+4 tests, 422 total). Functor bodies grow naturally — all in OCaml syntax. From 2f271fa6a6d10af6836e5afba14eae5d88879c03 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 16:16:52 +0000 Subject: [PATCH 055/298] ocaml: phase 1+6 Buffer + parser !x in app args (+3 tests, 425 total) Parser fix: at-app-start? and parse-app's loop recognise prefix ! as a deref of the next app arg. So 'List.rev !b' parses as '(:app List.rev (:deref b))' instead of stalling at !. Buffer module backed by a ref holding string list: create _ = ref [] add_string b s = b := s :: !b contents b = String.concat "" (List.rev !b) add_char/length/clear/reset --- lib/ocaml/parser.sx | 11 ++++++++--- lib/ocaml/runtime.sx | 10 ++++++++++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index cf71cad7..0ff699d7 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -167,7 +167,7 @@ ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false"))) true) - ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{"))) true) + ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true) (else false))))) (set! parse-pattern-atom @@ -532,7 +532,7 @@ ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin"))) true) - ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{"))) true) + ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true) (else false))))) (define parse-atom-postfix (fn () @@ -568,7 +568,12 @@ (when (at-app-start?) (let - ((arg (parse-atom-postfix))) + ((arg + (cond + ((at-op? "!") + (begin (advance-tok!) + (list :deref (parse-atom-postfix)))) + (else (parse-atom-postfix))))) (begin (set! head (list :app head arg)) (loop)))))) (loop) head)))) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 97d1835a..142eb3f2 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -366,6 +366,16 @@ let printf fmt = print_string fmt end ;; + module Buffer = struct + let create _ = ref [] + let add_string b s = b := s :: !b + let add_char b c = b := c :: !b + let contents b = String.concat \"\" (List.rev !b) + let length b = String.length (String.concat \"\" (List.rev !b)) + let clear b = b := [] + let reset = clear + end ;; + module Sys = struct let os_type = \"SX\" let word_size = 64 diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0a0e75ba..23206812 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1044,6 +1044,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3403) (eval "(ocaml-run-program \"module IntOrd = struct let compare a b = compare a b end ;; module IntSet = Set.Make(IntOrd) ;; let a = IntSet.add 1 (IntSet.add 2 (IntSet.add 3 IntSet.empty)) ;; let b = IntSet.add 2 (IntSet.add 3 (IntSet.add 4 IntSet.empty)) ;; IntSet.elements (IntSet.inter a b)\")") +;; ── Buffer module ────────────────────────────────────────────── +(epoch 3500) +(eval "(ocaml-run-program \"let b = Buffer.create 16 ;; Buffer.add_string b \\\"Hello\\\" ;; Buffer.add_string b \\\", \\\" ;; Buffer.add_string b \\\"World\\\" ;; Buffer.contents b\")") +(epoch 3501) +(eval "(ocaml-run-program \"let b = Buffer.create 16 ;; Buffer.add_string b \\\"abc\\\" ;; Buffer.length b\")") +(epoch 3502) +(eval "(ocaml-run-program \"let b = Buffer.create 16 ;; Buffer.add_string b \\\"x\\\" ;; Buffer.clear b ;; Buffer.contents b\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1652,6 +1660,11 @@ check 3401 "Map.is_empty false" 'false' check 3402 "Set.union" '(1 2 3)' check 3403 "Set.inter" '(2 3)' +# ── Buffer module ────────────────────────────────────────────── +check 3500 "Buffer concat 'Hello, World'" '"Hello, World"' +check 3501 "Buffer.length 3" '3' +check 3502 "Buffer.clear empties" '""' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b44bcece..c494ee9c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -264,6 +264,9 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] `Hashtbl`: `create`, `add`, `find`, `find_opt`, `replace`, `mem`, `length`. Backed by a one-element list cell holding a SX dict; keys coerced to strings via `str` for polymorphic-key support. +- [~] `Buffer`: `create`, `add_string`, `add_char`, `contents`, `length`, + `clear`, `reset`. Backed by a ref holding a list of strings; reverse + + `String.concat` on `contents`. Mostly-OCaml impl. - [~] `Sys`: `os_type` (`"SX"`), `word_size`, `max_array_length`, `max_string_length`, `executable_name`, `big_endian`, `unix`, `win32`, `cygwin`. Constants only; `argv`/`getenv_opt`/`command` @@ -386,6 +389,11 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-08 Phase 1+6 — Buffer module + parser fix for `f !x` (+3 + tests, 425 total). Parser: at-app-start? and parse-app's loop now + recognise `!` as the prefix-deref of an application argument, so + `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer + uses a ref holding a string list; contents reverses and concats. - 2026-05-08 Phase 5.1 — word_count.ml baseline (10/10 pass). Uses Map.Make(StrOrd) + List.fold_left to count word frequencies; tests the full functor pipeline with a real OCaml idiom. From ecdd90345ed23163c069b51fd914c53f8794a102 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 16:18:21 +0000 Subject: [PATCH 056/298] ocaml: refresh scoreboard (426/426 across 14 suites) --- lib/ocaml/scoreboard.json | 8 ++++---- lib/ocaml/scoreboard.md | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 13f52239..909a4877 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -1,8 +1,8 @@ { "suites": { - "eval-core": {"pass": 50, "fail": 0}, + "eval-core": {"pass": 51, "fail": 0}, "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 81, "fail": 0}, + "misc": {"pass": 87, "fail": 0}, "parser": {"pass": 106, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, @@ -15,7 +15,7 @@ "phase6-stdlib": {"pass": 59, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 419, + "total_pass": 426, "total_fail": 0, - "total": 419 + "total": 426 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index de426c35..551faa72 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,12 +1,12 @@ # OCaml-on-SX scoreboard -419 / 419 tests passing. +426 / 426 tests passing. | Suite | Pass | Fail | |---|---:|---:| -| eval-core | 50 | 0 | +| eval-core | 51 | 0 | | let-and | 3 | 0 | -| misc | 81 | 0 | +| misc | 87 | 0 | | parser | 106 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | From ffa74399fdc9f49bd87bb4914a13fded12f90bfd Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 16:53:44 +0000 Subject: [PATCH 057/298] ocaml: phase 5.1 calc.ml baseline (11/11 pass) + inline let-rec-and parser fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recursive-descent calculator parses '(1 + 2) * 3 + 4' = 13. Two parser bugs fixed: 1. parse-let now handles inline 'let rec a () = ... and b () = ... in body' via new (:let-rec-mut BINDINGS BODY) and (:let-mut BINDINGS BODY) AST shapes; eval handles both. 2. has-matching-in? lookahead no longer stops at 'and' — 'and' is internal to let-rec, not a decl boundary. Without this fix, the inner 'let rec a () = ... and b () = ...' inside a let-decl rhs would have been treated as the start of a new top-level decl. Baseline exercises mutually-recursive functions, while-loops, ref-cell imperative parsing, and ADT-based AST construction. --- lib/ocaml/baseline/calc.ml | 76 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + lib/ocaml/eval.sx | 51 +++++++++++++++++++++ lib/ocaml/parser.sx | 62 ++++++++++++++------------ plans/ocaml-on-sx.md | 8 ++++ 5 files changed, 170 insertions(+), 28 deletions(-) create mode 100644 lib/ocaml/baseline/calc.ml diff --git a/lib/ocaml/baseline/calc.ml b/lib/ocaml/baseline/calc.ml new file mode 100644 index 00000000..375caa1f --- /dev/null +++ b/lib/ocaml/baseline/calc.ml @@ -0,0 +1,76 @@ +(* Baseline: recursive-descent calculator for "+", "*", parens, ints. *) +type expr = + | Lit of int + | Add of expr * expr + | Mul of expr * expr +;; + +let parse_input src = + let pos = ref 0 in + let peek () = if !pos < String.length src then String.get src !pos else "" in + let advance () = pos := !pos + 1 in + let skip_ws () = + while !pos < String.length src && peek () = " " do advance () done + in + + let rec parse_atom () = + skip_ws () ; + if peek () = "(" then begin + advance () ; + let e = parse_expr () in + skip_ws () ; + advance () ; (* consume ')' *) + e + end + else + let start = !pos in + let rec digits () = + if !pos < String.length src then + let c = peek () in + if c >= "0" && c <= "9" then begin advance () ; digits () end + else () + in + digits () ; + let n = Int.of_string (String.sub src start (!pos - start)) in + Lit n + + and parse_term () = + skip_ws () ; + let lhs = ref (parse_atom ()) in + let rec loop () = + skip_ws () ; + if peek () = "*" then begin + advance () ; + lhs := Mul (!lhs, parse_atom ()) ; + loop () + end + in + loop () ; + !lhs + + and parse_expr () = + skip_ws () ; + let lhs = ref (parse_term ()) in + let rec loop () = + skip_ws () ; + if peek () = "+" then begin + advance () ; + lhs := Add (!lhs, parse_term ()) ; + loop () + end + in + loop () ; + !lhs + in + parse_expr () +;; + +let rec eval e = + match e with + | Lit n -> n + | Add (a, b) -> eval a + eval b + | Mul (a, b) -> eval a * eval b +;; + +(* (1 + 2) * 3 + 4 = 9 + 4 = 13 *) +eval (parse_input "(1 + 2) * 3 + 4") diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3452637e..98764167 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,4 +1,5 @@ { + "calc.ml": 13, "closures.ml": 315, "exception_handle.ml": 4, "expr_eval.ml": 16, diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 03a93208..d91e522b 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -619,6 +619,57 @@ (ocaml-eval rhs env) (ocaml-make-curried params rhs env)))) (ocaml-eval body (ocaml-env-extend env name rhs-val))))) + ((= tag "let-mut") + ;; (:let-mut BINDINGS BODY) — non-rec multi-binding let-in. + ;; Each rhs evaluated in the parent env, then names bound + ;; sequentially before evaluating BODY. + (let ((bindings (nth ast 1)) (body (nth ast 2)) (env-cur env)) + (begin + (define one + (fn (b) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((v (if (= (len ps) 0) + (ocaml-eval rh env-cur) + (ocaml-make-curried ps rh env-cur)))) + (set! env-cur (ocaml-env-extend env-cur nm v)))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (one (first xs)) (loop (rest xs)))))) + (loop bindings) + (ocaml-eval body env-cur)))) + ((= tag "let-rec-mut") + ;; (:let-rec-mut BINDINGS BODY) — mutually-recursive let-in. + (let ((bindings (nth ast 1)) (body (nth ast 2)) + (env2 env) (cells (list))) + (begin + (define alloc + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((c (list nil)) (nm (nth b 0))) + (begin + (append! cells c) + (set! env2 (ocaml-env-extend env2 nm + (fn (a) ((nth c 0) a)))) + (alloc (rest xs)))))))) + (alloc bindings) + (let ((idx 0)) + (begin + (define fill + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((v (if (= (len ps) 0) + (ocaml-eval rh env2) + (ocaml-make-curried ps rh env2)))) + (begin + (set-nth! (nth cells idx) 0 v) + (set! idx (+ idx 1)) + (fill (rest xs))))))))) + (fill bindings) + (ocaml-eval body env2)))))) ((= tag "let-rec") ;; Tie the knot via a mutable cell when rhs is function-typed. ;; The placeholder closure dereferences the cell on each call. diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 0ff699d7..67bedc65 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -666,36 +666,43 @@ (let ((body (parse-expr))) (list :fun params body)))))) (define parse-let - (fn - () - (let - ((reccy false)) + (fn () + (let ((reccy false) (bindings (list))) (begin - (when - (at-kw? "rec") + (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) - (let - ((name (ocaml-tok-value (consume! "ident" nil))) - (params (list))) - (begin - (define - collect-params - (fn () - (let ((nm (try-consume-param!))) - (when (not (= nm nil)) - (begin (append! params nm) (collect-params)))))) - (collect-params) - (consume! "op" "=") - (let - ((rhs (parse-expr))) + (define parse-one! + (fn () + (let ((nm (ocaml-tok-value (consume! "ident" nil))) + (ps (list))) (begin - (consume! "keyword" "in") - (let - ((body (parse-expr))) - (if - reccy - (list :let-rec name params rhs body) - (list :let name params rhs body))))))))))) + (define collect-params + (fn () + (let ((p (try-consume-param!))) + (when (not (= p nil)) + (begin (append! ps p) (collect-params)))))) + (collect-params) + (consume! "op" "=") + (let ((rhs (parse-expr))) + (append! bindings (list nm ps rhs))))))) + (parse-one!) + (define more + (fn () + (when (at-kw? "and") + (begin (advance-tok!) (parse-one!) (more))))) + (more) + (consume! "keyword" "in") + (let ((body (parse-expr))) + (cond + ((= (len bindings) 1) + (let ((b (first bindings))) + (if reccy + (list :let-rec (nth b 0) (nth b 1) (nth b 2) body) + (list :let (nth b 0) (nth b 1) (nth b 2) body)))) + (else + (if reccy + (list :let-rec-mut bindings body) + (list :let-mut bindings body))))))))) (define parse-if (fn @@ -975,7 +982,6 @@ ((and (= tt "keyword") (= tv "exception")) (set! done true)) ((and (= tt "keyword") (= tv "open")) (set! done true)) ((and (= tt "keyword") (= tv "include")) (set! done true)) - ((and (= tt "keyword") (= tv "and")) (set! done true)) ((and (= tt "keyword") (= tv "let")) (begin (set! d (+ d 1)) (set! p (+ p 1)) (scan))) ((and (= tt "keyword") (= tv "in")) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c494ee9c..86260fea 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -394,6 +394,14 @@ _Newest first._ recognise `!` as the prefix-deref of an application argument, so `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer uses a ref holding a string list; contents reverses and concats. +- 2026-05-08 Phase 5.1+1+2 — calc.ml baseline (11/11 pass) — a + recursive-descent calculator parsing `(1 + 2) * 3 + 4` to 13. Two + parser bugs fixed along the way: parse-let now handles inline + `let rec ... and ... in body` via new `:let-rec-mut` / `:let-mut` + AST shapes (eval supports both); `has-matching-in?` no longer stops + at `and` (which is internal to a let-rec, not a decl boundary). The + baseline exercises mutually-recursive functions, while-loops, and + ref-cell-driven imperative parsing. - 2026-05-08 Phase 5.1 — word_count.ml baseline (10/10 pass). Uses Map.Make(StrOrd) + List.fold_left to count word frequencies; tests the full functor pipeline with a real OCaml idiom. From f05d405bace21fd094f84766514aa8a8c0f485bb Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 17:03:32 +0000 Subject: [PATCH 058/298] ocaml: phase 6 Stack + Queue modules (+5 tests, 430 total) Stack: ref-holding-list LIFO. push/pop/top/length/is_empty/clear. Queue: two-list (front, back) amortised O(1) queue. push/pop/length/ is_empty/clear. Both in OCaml syntax in runtime.sx. --- lib/ocaml/runtime.sx | 42 ++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 71 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 142eb3f2..29177685 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -366,6 +366,48 @@ let printf fmt = print_string fmt end ;; + module Stack = struct + let create () = ref [] + let push x s = s := x :: !s + let pop s = + match !s with + | [] -> failwith \"Stack.pop: empty\" + | h :: t -> s := t ; h + let top s = + match !s with + | [] -> failwith \"Stack.top: empty\" + | h :: _ -> h + let is_empty s = !s = [] + let length s = List.length !s + let clear s = s := [] + end ;; + + module Queue = struct + (* Simple two-list amortized queue: (front, back). pop drains + front; refill from rev back when front is empty. *) + let create () = ref ([], []) + let push x q = + let p = !q in + q := (List.append [] (match p with (f, b) -> f), x :: (match p with (_, b) -> b)) + let length q = + let p = !q in + let f = match p with (f, _) -> f in + let b = match p with (_, b) -> b in + List.length f + List.length b + let is_empty q = length q = 0 + let pop q = + let p = !q in + let f = match p with (f, _) -> f in + let b = match p with (_, b) -> b in + match f with + | h :: t -> q := (t, b) ; h + | [] -> + (match List.rev b with + | [] -> failwith \"Queue.pop: empty\" + | h :: t -> q := (t, []) ; h) + let clear q = q := ([], []) + end ;; + module Buffer = struct let create _ = ref [] let add_string b s = b := s :: !b diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 23206812..6d467482 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1052,6 +1052,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3502) (eval "(ocaml-run-program \"let b = Buffer.create 16 ;; Buffer.add_string b \\\"x\\\" ;; Buffer.clear b ;; Buffer.contents b\")") +;; ── Stack + Queue modules ───────────────────────────────────── +(epoch 3600) +(eval "(ocaml-run-program \"let s = Stack.create () ;; Stack.push 1 s ;; Stack.push 2 s ;; Stack.push 3 s ;; Stack.pop s\")") +(epoch 3601) +(eval "(ocaml-run-program \"let s = Stack.create () ;; Stack.push 1 s ;; Stack.push 2 s ;; Stack.length s\")") +(epoch 3602) +(eval "(ocaml-run-program \"let s = Stack.create () ;; Stack.push 1 s ;; Stack.top s\")") +(epoch 3603) +(eval "(ocaml-run-program \"let q = Queue.create () ;; Queue.push 1 q ;; Queue.push 2 q ;; Queue.push 3 q ;; Queue.pop q\")") +(epoch 3604) +(eval "(ocaml-run-program \"let q = Queue.create () ;; Queue.push 1 q ;; Queue.push 2 q ;; Queue.length q\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1665,6 +1677,13 @@ check 3500 "Buffer concat 'Hello, World'" '"Hello, World"' check 3501 "Buffer.length 3" '3' check 3502 "Buffer.clear empties" '""' +# ── Stack + Queue ────────────────────────────────────────────── +check 3600 "Stack.pop LIFO" '3' +check 3601 "Stack.length" '2' +check 3602 "Stack.top" '1' +check 3603 "Queue.pop FIFO" '1' +check 3604 "Queue.length" '2' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 86260fea..dd4698bf 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -267,6 +267,11 @@ SX CEK evaluator (both JS and OCaml hosts) - [~] `Buffer`: `create`, `add_string`, `add_char`, `contents`, `length`, `clear`, `reset`. Backed by a ref holding a list of strings; reverse + `String.concat` on `contents`. Mostly-OCaml impl. +- [~] `Stack`: `create`, `push`, `pop`, `top`, `is_empty`, `length`, + `clear`. Backed by a ref-holding-list (LIFO). +- [~] `Queue`: `create`, `push`, `pop`, `is_empty`, `length`, `clear`. + Backed by a `(front, back)` tuple-of-lists pair (amortised O(1) + enqueue/dequeue via list reversal). - [~] `Sys`: `os_type` (`"SX"`), `word_size`, `max_array_length`, `max_string_length`, `executable_name`, `big_endian`, `unix`, `win32`, `cygwin`. Constants only; `argv`/`getenv_opt`/`command` @@ -394,6 +399,11 @@ _Newest first._ recognise `!` as the prefix-deref of an application argument, so `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer uses a ref holding a string list; contents reverses and concats. +- 2026-05-08 Phase 6 — `Stack` and `Queue` modules in OCaml (+5 tests, + 430 total). Stack: ref-holding-list LIFO with push/pop/top/length/ + is_empty/clear. Queue: amortised O(1) two-list `(front, back)` queue + with push/pop/length/is_empty/clear. Both written entirely in OCaml + via lib/ocaml/runtime.sx. - 2026-05-08 Phase 5.1+1+2 — calc.ml baseline (11/11 pass) — a recursive-descent calculator parsing `(1 + 2) * 3 + 4` to 13. Two parser bugs fixed along the way: parse-let now handles inline From 4909ebe2ad59a43a86336cabb8e8ab5e4deef805 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 17:19:16 +0000 Subject: [PATCH 059/298] ocaml: phase 6 Option/Result/Bytes extensions (+9 tests, 439 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Option: join, to_result, some, none. Result: value, iter, fold. Bytes: length, get, of_string, to_string, concat, sub — thin alias of String (SX has no separate immutable byte type). Ordering fix: Bytes module placed after String so its closures capture String in scope. Earlier draft put Bytes before String which made String.* lookups fail with 'not a record/module' (treated as nullary ctor). --- lib/ocaml/runtime.sx | 38 ++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 31 +++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 75 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 29177685..52a27d08 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -274,6 +274,19 @@ match o with | None -> [] | Some x -> [x] + + let join oo = + match oo with + | None -> None + | Some inner -> inner + + let to_result none_v o = + match o with + | None -> Error none_v + | Some x -> Ok x + + let some x = Some x + let none = None end ;; module Result = struct @@ -316,6 +329,21 @@ match r with | Ok x -> Some x | Error _ -> None + + let value r default = + match r with + | Ok x -> x + | Error _ -> default + + let iter f r = + match r with + | Ok x -> f x + | Error _ -> () + + let fold ok_f err_f r = + match r with + | Ok x -> ok_f x + | Error e -> err_f e end ;; module String = struct @@ -334,6 +362,16 @@ let index_of s sub = _string_index_of s sub end ;; + module Bytes = struct + (* Thin alias of String — SX has no separate immutable byte type. *) + let length s = String.length s + let get s i = String.get s i + let of_string s = s + let to_string s = s + let concat sep xs = String.concat sep xs + let sub s i n = String.sub s i n + end ;; + module Char = struct let code c = _char_code c let chr n = _char_chr n diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 6d467482..404b707b 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1064,6 +1064,26 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3604) (eval "(ocaml-run-program \"let q = Queue.create () ;; Queue.push 1 q ;; Queue.push 2 q ;; Queue.length q\")") +;; ── Option/Result/Bytes extensions ──────────────────────────── +(epoch 3700) +(eval "(ocaml-run \"Option.join (Some (Some 5))\")") +(epoch 3701) +(eval "(ocaml-run \"Option.join None\")") +(epoch 3702) +(eval "(ocaml-run \"Option.to_result \\\"missing\\\" None\")") +(epoch 3703) +(eval "(ocaml-run \"Option.to_result \\\"missing\\\" (Some 7)\")") +(epoch 3704) +(eval "(ocaml-run \"Result.value (Ok 5) 0\")") +(epoch 3705) +(eval "(ocaml-run \"Result.value (Error \\\"e\\\") 99\")") +(epoch 3706) +(eval "(ocaml-run \"Result.fold (fun x -> x * 10) (fun e -> 0) (Ok 5)\")") +(epoch 3707) +(eval "(ocaml-run \"Bytes.length \\\"hello\\\"\")") +(epoch 3708) +(eval "(ocaml-run \"Bytes.concat \\\"-\\\" [\\\"a\\\";\\\"b\\\";\\\"c\\\"]\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1684,6 +1704,17 @@ check 3602 "Stack.top" '1' check 3603 "Queue.pop FIFO" '1' check 3604 "Queue.length" '2' +# ── Option/Result/Bytes extensions ───────────────────────────── +check 3700 "Option.join nested" '("Some" 5)' +check 3701 "Option.join None" '("None")' +check 3702 "Option.to_result None" '("Error" "missing")' +check 3703 "Option.to_result Some" '("Ok" 7)' +check 3704 "Result.value Ok" '5' +check 3705 "Result.value Error fallback" '99' +check 3706 "Result.fold Ok" '50' +check 3707 "Bytes.length" '5' +check 3708 "Bytes.concat" '"a-b-c"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index dd4698bf..742efe40 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -399,6 +399,12 @@ _Newest first._ recognise `!` as the prefix-deref of an application argument, so `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer uses a ref holding a string list; contents reverses and concats. +- 2026-05-08 Phase 6 — Option/Result/Bytes extensions (+9 tests, 439 + total). Option: join, to_result, some, none. Result: value, iter, + fold. Bytes: length, get, of_string, to_string, concat, sub (thin + alias of String — SX has no separate immutable byte type). Ordering + fix: Bytes module placed after String so its closures capture String + in scope. - 2026-05-08 Phase 6 — `Stack` and `Queue` modules in OCaml (+5 tests, 430 total). Stack: ref-holding-list LIFO with push/pop/top/length/ is_empty/clear. Queue: amortised O(1) two-list `(front, back)` queue From cabf5dc9c324068c724619fe7049b26e5540db23 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 17:28:27 +0000 Subject: [PATCH 060/298] ocaml: phase 5 HM let-mut / let-rec-mut (+3 tests, 442 total) ocaml-infer-let-mut: each rhs inferred in parent env, generalized sequentially before adding to body env. ocaml-infer-let-rec-mut: pre-bind all names with fresh tvs; infer each rhs against the joint env, unify each with its tv, then generalize all and infer body. Mutual recursion now type-checks: let rec even n = if n = 0 then true else odd (n - 1) and odd n = if n = 0 then false else even (n - 1) in even : Int -> Bool --- lib/ocaml/infer.sx | 92 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 13 +++++++ plans/ocaml-on-sx.md | 6 +++ 3 files changed, 111 insertions(+) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index 84beebf2..c0f8c0f3 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -166,6 +166,94 @@ (let ((s2 (get r2 :subst)) (t2 (get r2 :type))) {:subst (hm-compose s2 s1) :type t2})))))))))) +;; let x = e1 and y = e2 in body — non-rec multi-binding; each rhs is +;; inferred against the parent env, then generalized and added to body env. +(define ocaml-infer-let-mut + (fn (bindings body env counter) + (let ((subst {}) (env-cur env)) + (begin + (define one + (fn (b) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env-cur counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((env-after (hm-apply-env s env-cur))) + (let ((scheme (hm-generalize t env-after))) + (begin + (set! subst (hm-compose s subst)) + (set! env-cur (assoc env-after nm scheme))))))))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (one (first xs)) (loop (rest xs)))))) + (loop bindings) + (let ((rb (ocaml-infer body env-cur counter))) + (let ((sb (get rb :subst)) (tb (get rb :type))) + {:subst (hm-compose sb subst) :type tb})))))) + +;; let rec f = ... and g = ... in body — mutually recursive multi-binding. +;; Pre-bind all names with fresh tvs, infer rhs in joint env, unify with +;; tvs, generalize, infer body. +(define ocaml-infer-let-rec-mut + (fn (bindings body env counter) + (let ((tvs (list)) (env-rec env)) + (begin + (define alloc + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((nm (nth b 0)) (tv (hm-fresh-tv counter))) + (begin + (append! tvs tv) + (set! env-rec (assoc env-rec nm (hm-monotype tv))) + (alloc (rest xs)))))))) + (alloc bindings) + (let ((subst {}) (idx 0)) + (begin + (define infer-one + (fn (b) + (let ((ps (nth b 1)) (rh (nth b 2))) + (let ((rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env-rec counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify + (hm-apply s (nth tvs idx)) + t + (hm-compose s subst)))) + (begin + (set! subst s2) + (set! idx (+ idx 1)))))))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (infer-one (first xs)) (loop (rest xs)))))) + (loop bindings) + (let ((env-final (hm-apply-env subst env))) + (begin + (set! idx 0) + (define gen-one + (fn (b) + (let ((nm (nth b 0))) + (let ((scheme (hm-generalize + (hm-apply subst (nth tvs idx)) + env-final))) + (begin + (set! env-final (assoc env-final nm scheme)) + (set! idx (+ idx 1))))))) + (define loop2 + (fn (xs) + (when (not (= xs (list))) + (begin (gen-one (first xs)) (loop2 (rest xs)))))) + (loop2 bindings) + (let ((rb (ocaml-infer body env-final counter))) + (let ((sb (get rb :subst)) (tb (get rb :type))) + {:subst (hm-compose sb subst) :type tb})))))))))) + ;; let-rec name params = rhs in body — bind name to a fresh tv before ;; inferring rhs, then unify the inferred rhs type with the tv. This ;; lets rhs reference name (recursive call). Generalize after. @@ -491,6 +579,10 @@ (nth expr 3) (nth expr 4) env counter)) ((= tag "let-rec") (ocaml-infer-let-rec (nth expr 1) (nth expr 2) (nth expr 3) (nth expr 4) env counter)) + ((= tag "let-mut") + (ocaml-infer-let-mut (nth expr 1) (nth expr 2) env counter)) + ((= tag "let-rec-mut") + (ocaml-infer-let-rec-mut (nth expr 1) (nth expr 2) env counter)) ((= tag "if") (ocaml-infer-if (nth expr 1) (nth expr 2) (nth expr 3) env counter)) ((= tag "tuple") (ocaml-infer-tuple (rest expr) env counter)) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 404b707b..c7345ac0 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1084,6 +1084,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3708) (eval "(ocaml-run \"Bytes.concat \\\"-\\\" [\\\"a\\\";\\\"b\\\";\\\"c\\\"]\")") +;; ── HM let-mut / let-rec-mut ────────────────────────────────── +(epoch 3800) +(eval "(ocaml-type-of \"let x = 1 and y = 2 in x + y\")") +(epoch 3801) +(eval "(ocaml-type-of \"let rec even n = if n = 0 then true else odd (n - 1) and odd n = if n = 0 then false else even (n - 1) in even\")") +(epoch 3802) +(eval "(ocaml-type-of \"let f x = x + 1 and g x = x * 2 in f 1 + g 2\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1715,6 +1723,11 @@ check 3706 "Result.fold Ok" '50' check 3707 "Bytes.length" '5' check 3708 "Bytes.concat" '"a-b-c"' +# ── HM let-mut / let-rec-mut ─────────────────────────────────── +check 3800 "let-mut x+y : Int" '"Int"' +check 3801 "let-rec-mut even" '"Int -> Bool"' +check 3802 "let-mut f and g" '"Int"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 742efe40..07a57fac 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -399,6 +399,12 @@ _Newest first._ recognise `!` as the prefix-deref of an application argument, so `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer uses a ref holding a string list; contents reverses and concats. +- 2026-05-08 Phase 5 — HM let-mut / let-rec-mut inference (+3 tests, + 442 total). `ocaml-infer-let-mut` infers each rhs in the parent env + and generalizes sequentially; `ocaml-infer-let-rec-mut` pre-binds + all names with fresh tvs, infers each rhs against the joint env, + unifies, generalizes, then infers body. Mutual recursion works: + `let rec even n = ... and odd n = ... in even : Int -> Bool`. - 2026-05-08 Phase 6 — Option/Result/Bytes extensions (+9 tests, 439 total). Option: join, to_result, some, none. Result: value, iter, fold. Bytes: length, get, of_string, to_string, concat, sub (thin From 0bc6dbd2330517ed7dae90d91c0301bcf3c1afbb Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 17:37:00 +0000 Subject: [PATCH 061/298] ocaml: phase 2+6 print primitives wire to host display (+2 tests, 444 total) print_string / print_endline / print_int / print_newline now route to SX display primitive (not the non-existent print/println). print_endline appends '\n'. let _ = expr ;; at top level confirmed working via the wildcard-param parser. --- lib/ocaml/eval.sx | 9 +++++---- lib/ocaml/test.sh | 10 ++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index d91e522b..ca12fdda 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -72,10 +72,11 @@ (list "_string_of_float" (fn (f) (str f))) (list "_char_code" (fn (c) (char-code c))) (list "_char_chr" (fn (n) (char-from-code n))) - ;; Print: prints to host stdout via println. - (list "print_string" (fn (s) (begin (print s) nil))) - (list "print_endline" (fn (s) (begin (println s) nil))) - (list "print_int" (fn (i) (begin (print (str i)) nil))) + ;; Print: route to host SX `display` (no automatic newline). + (list "print_string" (fn (s) (begin (display s) nil))) + (list "print_endline" (fn (s) (begin (display s) (display "\n") nil))) + (list "print_int" (fn (i) (begin (display (str i)) nil))) + (list "print_newline" (fn (_) (begin (display "\n") nil))) ;; Float math primitives. (list "_float_sqrt" (fn (x) (sqrt x))) (list "_float_sin" (fn (x) (sin x))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index c7345ac0..3c5f2114 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1092,6 +1092,12 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3802) (eval "(ocaml-type-of \"let f x = x + 1 and g x = x * 2 in f 1 + g 2\")") +;; ── let _ = expr top-level ───────────────────────────────────── +(epoch 3900) +(eval "(ocaml-run-program \"let _ = 1 + 2 ;; 42\")") +(epoch 3901) +(eval "(ocaml-run-program \"let x = 10 ;; let _ = x ;; x * 2\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1728,6 +1734,10 @@ check 3800 "let-mut x+y : Int" '"Int"' check 3801 "let-rec-mut even" '"Int -> Bool"' check 3802 "let-mut f and g" '"Int"' +# ── let _ = expr top-level ───────────────────────────────────── +check 3900 "let _ = 1+2;; 42" '42' +check 3901 "two top-level lets, _" '20' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 07a57fac..6059f979 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -399,6 +399,13 @@ _Newest first._ recognise `!` as the prefix-deref of an application argument, so `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer uses a ref holding a string list; contents reverses and concats. +- 2026-05-08 Phase 2+6 — print primitives wired to host `display` (+2 + tests, 444 total). `print_string` / `print_endline` / `print_int` / + `print_newline` now use SX `display` (no auto-newline) plus an + explicit `"\n"` for endline. Prior version referenced `print`/ + `println` host primitives that don't exist. `let _ = expr ;;` + top-level decl works as expected (already supported by the + wildcard-param parser). - 2026-05-08 Phase 5 — HM let-mut / let-rec-mut inference (+3 tests, 442 total). `ocaml-infer-let-mut` infers each rhs in the parent env and generalizes sequentially; `ocaml-infer-let-rec-mut` pre-binds From d8f1882b505ef7b91e5cc665bc49f37d2c7fd0b3 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 17:44:07 +0000 Subject: [PATCH 062/298] ocaml: phase 5.1 fizzbuzz.ml baseline (12/12 pass) Classic fizzbuzz using ref-cell accumulator, for-loop, mod, if/elseif chain, String.concat, Int.to_string. Output verified via String.length of the comma-joined result for n=15: 57. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/fizzbuzz.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 3 +++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/fizzbuzz.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 98764167..f277cca4 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -4,6 +4,7 @@ "exception_handle.ml": 4, "expr_eval.ml": 16, "factorial.ml": 3628800, + "fizzbuzz.ml": 57, "list_ops.ml": 30, "module_use.ml": 3, "option_match.ml": 5, diff --git a/lib/ocaml/baseline/fizzbuzz.ml b/lib/ocaml/baseline/fizzbuzz.ml new file mode 100644 index 00000000..86933c01 --- /dev/null +++ b/lib/ocaml/baseline/fizzbuzz.ml @@ -0,0 +1,17 @@ +(* Baseline: fizzbuzz returning a list of strings *) +let fizzbuzz n = + let acc = ref [] in + for i = 1 to n do + let s = + if i mod 15 = 0 then "FizzBuzz" + else if i mod 3 = 0 then "Fizz" + else if i mod 5 = 0 then "Buzz" + else Int.to_string i + in + acc := s :: !acc + done ; + List.rev !acc +;; + +(* Concatenated for a deterministic check value via String.length *) +String.length (String.concat "," (fizzbuzz 15)) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6059f979..6e3dc860 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -399,6 +399,9 @@ _Newest first._ recognise `!` as the prefix-deref of an application argument, so `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer uses a ref holding a string list; contents reverses and concats. +- 2026-05-08 Phase 5.1 — fizzbuzz.ml baseline (12/12 pass). Classic + fizzbuzz using ref-cell accumulator, for-loop, mod, if/elseif chain, + String.concat, Int.to_string. Verifies output via String.length. - 2026-05-08 Phase 2+6 — print primitives wired to host `display` (+2 tests, 444 total). `print_string` / `print_endline` / `print_int` / `print_newline` now use SX `display` (no auto-newline) plus an From 0858986877c49c6c72396bbf793e431ad9a8757b Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 17:52:49 +0000 Subject: [PATCH 063/298] ocaml: phase 5.1 btree.ml baseline (13/13 pass) Polymorphic binary search tree with insert + in-order traversal. Exercises parametric ADT (type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree), recursive match, List.append, List.fold_left. --- lib/ocaml/baseline/btree.ml | 25 +++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 4 ++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/btree.ml diff --git a/lib/ocaml/baseline/btree.ml b/lib/ocaml/baseline/btree.ml new file mode 100644 index 00000000..19dad101 --- /dev/null +++ b/lib/ocaml/baseline/btree.ml @@ -0,0 +1,25 @@ +(* Baseline: binary search tree with insert + in-order traversal *) +type 'a tree = + | Leaf + | Node of 'a * 'a tree * 'a tree +;; + +let rec insert x t = + match t with + | Leaf -> Node (x, Leaf, Leaf) + | Node (v, l, r) -> + if x < v then Node (v, insert x l, r) + else if x > v then Node (v, l, insert x r) + else t +;; + +let rec inorder t = + match t with + | Leaf -> [] + | Node (v, l, r) -> List.append (inorder l) (v :: inorder r) +;; + +let from_list xs = List.fold_left (fun t x -> insert x t) Leaf xs ;; + +let t = from_list [5; 3; 8; 1; 4; 7; 9; 2] ;; +List.fold_left (fun a b -> a + b) 0 (inorder t) diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f277cca4..d5b441f4 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,4 +1,5 @@ { + "btree.ml": 39, "calc.ml": 13, "closures.ml": 315, "exception_handle.ml": 4, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6e3dc860..7dd9e206 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -399,6 +399,10 @@ _Newest first._ recognise `!` as the prefix-deref of an application argument, so `String.concat "" (List.rev !b)` parses as `(... (deref b))`. Buffer uses a ref holding a string list; contents reverses and concats. +- 2026-05-08 Phase 5.1 — btree.ml baseline (13/13 pass). Polymorphic + binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * + 'a tree`) with insert + in-order traversal. Tests parametric ADT, + recursive match, List.append, List.fold_left. - 2026-05-08 Phase 5.1 — fizzbuzz.ml baseline (12/12 pass). Classic fizzbuzz using ref-cell accumulator, for-loop, mod, if/elseif chain, String.concat, Int.to_string. Verifies output via String.length. From f070bddb0e6f6fb1b2f95ca116d2d188ae72f375 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 18:12:23 +0000 Subject: [PATCH 064/298] ocaml: phase 5.1 conformance.sh integrates baseline (458/458 across 15 suites) bash lib/ocaml/conformance.sh now runs lib/ocaml/baseline/run.sh and aggregates pass/fail counts under a 'baseline' suite. Full-suite scoreboard now reports both unit-test results and end-to-end OCaml program runs in a single artifact. --- lib/ocaml/conformance.sh | 11 +++++++++++ lib/ocaml/scoreboard.json | 13 +++++++------ lib/ocaml/scoreboard.md | 11 ++++++----- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/lib/ocaml/conformance.sh b/lib/ocaml/conformance.sh index 7cc471fa..884deb28 100755 --- a/lib/ocaml/conformance.sh +++ b/lib/ocaml/conformance.sh @@ -72,6 +72,17 @@ while IFS= read -r line; do fi done < "$TMPLOG" +# Run baseline OCaml programs and aggregate into a 'baseline' suite. +if [ -x lib/ocaml/baseline/run.sh ]; then + while IFS= read -r line; do + if [[ "$line" =~ ^[[:space:]]*ok\ ([^[:space:]]+\.ml) ]]; then + SUITE_PASS[baseline]=$(( ${SUITE_PASS[baseline]:-0} + 1 )) + elif [[ "$line" =~ ^[[:space:]]*FAIL\ ([^[:space:]]+\.ml) ]]; then + SUITE_FAIL[baseline]=$(( ${SUITE_FAIL[baseline]:-0} + 1 )) + fi + done < <(bash lib/ocaml/baseline/run.sh 2>/dev/null) +fi + # Pull the final pass/total TOTAL_PASS=0 TOTAL_FAIL=0 diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 909a4877..82bdcb67 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -1,21 +1,22 @@ { "suites": { + "baseline": {"pass": 13, "fail": 0}, "eval-core": {"pass": 51, "fail": 0}, - "let-and": {"pass": 3, "fail": 0}, - "misc": {"pass": 87, "fail": 0}, + "let-and": {"pass": 4, "fail": 0}, + "misc": {"pass": 98, "fail": 0}, "parser": {"pass": 106, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, "phase2-loops": {"pass": 4, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, - "phase3-adt": {"pass": 27, "fail": 0}, + "phase3-adt": {"pass": 33, "fail": 0}, "phase4-modules": {"pass": 14, "fail": 0}, "phase5-hm": {"pass": 38, "fail": 0}, - "phase6-stdlib": {"pass": 59, "fail": 0}, + "phase6-stdlib": {"pass": 60, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 426, + "total_pass": 458, "total_fail": 0, - "total": 426 + "total": 458 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index 551faa72..7194c2e8 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,20 +1,21 @@ # OCaml-on-SX scoreboard -426 / 426 tests passing. +458 / 458 tests passing. | Suite | Pass | Fail | |---|---:|---:| +| baseline | 13 | 0 | | eval-core | 51 | 0 | -| let-and | 3 | 0 | -| misc | 87 | 0 | +| let-and | 4 | 0 | +| misc | 98 | 0 | | parser | 106 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | | phase2-loops | 4 | 0 | | phase2-refs | 6 | 0 | -| phase3-adt | 27 | 0 | +| phase3-adt | 33 | 0 | | phase4-modules | 14 | 0 | | phase5-hm | 38 | 0 | -| phase6-stdlib | 59 | 0 | +| phase6-stdlib | 60 | 0 | | tokenize | 18 | 0 | From 66da0e5b840338ea2e0833bc3efc9a7883b42d90 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 18:26:34 +0000 Subject: [PATCH 065/298] ocaml: phase 1+3 record type declarations (+3 tests, 447 total) type r = { x : int; mutable y : string } parses to (:type-def-record NAME PARAMS FIELDS) with FIELDS each (NAME) or (:mutable NAME). Parser dispatches on { after = to parse field list. Field-type sources are skipped (HM registration TBD). Runtime no-op since records already work as dynamic dicts. --- lib/ocaml/eval.sx | 5 ++ lib/ocaml/parser.sx | 115 +++++++++++++++++++++++++++++-------------- lib/ocaml/test.sh | 13 +++++ plans/ocaml-on-sx.md | 7 +++ 4 files changed, 104 insertions(+), 36 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index ca12fdda..22fa3b2c 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -780,6 +780,7 @@ (set! env (ocaml-env-extend env mname mod-val)) (set! result (merge result (dict mname mod-val)))))))) ((= tag "type-def") nil) + ((= tag "type-def-record") nil) ((= tag "exception-def") nil) ((= tag "module-type-def") nil) ((= tag "open") @@ -962,6 +963,10 @@ ;; exception E [of T] — purely declarative; raise+match ;; already work on tagged ctor values. nil) + ((= tag "type-def-record") + ;; type r = { x : T; y : T } — runtime no-op; records + ;; are already dynamic dicts. + nil) ((= tag "module-type-def") ;; module type S = sig … end — no-op at runtime. nil) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 67bedc65..cd9ef2f2 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -1186,42 +1186,85 @@ (let ((name (ocaml-tok-value (consume! "ident" nil)))) (begin (consume! "op" "=") - (when (at-op? "|") (advance-tok!)) - ;; Parse a sum-type: Ctor [of TYPE [* TYPE]*] (| Ctor …)* - (let ((ctors (list))) - (begin - (define one - (fn () - (let ((cname (ocaml-tok-value (consume! "ctor" nil))) - (arg-srcs (list))) - (begin - (when (at-kw? "of") - (begin - (advance-tok!) - (let ((arg-start (cur-pos))) - (begin - (define skip-type - (fn () - (cond - ((>= idx tok-len) nil) - ((= (ocaml-tok-type (peek-tok)) "eof") nil) - ((at-op? "|") nil) - ((at-op? ";;") nil) - ((at-kw? "let") nil) - ((at-kw? "type") nil) - ((at-kw? "and") nil) - ((at-kw? "module") nil) - (else (begin (advance-tok!) (skip-type)))))) - (skip-type) - (append! arg-srcs (slice src arg-start (cur-pos))))))) - (append! ctors (cons cname arg-srcs)))))) - (one) - (define more - (fn () - (when (at-op? "|") - (begin (advance-tok!) (one) (more))))) - (more) - (list :type-def name tparams ctors))))))))) + (cond + ;; Record type: type NAME = { f1 [: T1]; f2 [: T2]; ... } + ((at-op? "{") + (begin + (advance-tok!) + (let ((fields (list))) + (begin + (define field-one + (fn () + (let ((mut false)) + (begin + (when (at-kw? "mutable") + (begin (advance-tok!) (set! mut true))) + (let ((fname (ocaml-tok-value (consume! "ident" nil)))) + (begin + (when (at-op? ":") + (begin + (advance-tok!) + (define skip-fty + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? ";") nil) + ((at-op? "}") nil) + (else (begin (advance-tok!) (skip-fty)))))) + (skip-fty))) + (append! fields + (if mut + (list :mutable fname) + (list fname))))))))) + (field-one) + (define field-more + (fn () + (when (at-op? ";") + (begin (advance-tok!) + (when (not (at-op? "}")) + (begin (field-one) (field-more))))))) + (field-more) + (consume! "op" "}") + (list :type-def-record name tparams fields))))) + (else + (begin + (when (at-op? "|") (advance-tok!)) + ;; Sum type: Ctor [of TYPE [* TYPE]*] (| Ctor …)* + (let ((ctors (list))) + (begin + (define one + (fn () + (let ((cname (ocaml-tok-value (consume! "ctor" nil))) + (arg-srcs (list))) + (begin + (when (at-kw? "of") + (begin + (advance-tok!) + (let ((arg-start (cur-pos))) + (begin + (define skip-type + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? "|") nil) + ((at-op? ";;") nil) + ((at-kw? "let") nil) + ((at-kw? "type") nil) + ((at-kw? "and") nil) + ((at-kw? "module") nil) + (else (begin (advance-tok!) (skip-type)))))) + (skip-type) + (append! arg-srcs (slice src arg-start (cur-pos))))))) + (append! ctors (cons cname arg-srcs)))))) + (one) + (define more + (fn () + (when (at-op? "|") + (begin (advance-tok!) (one) (more))))) + (more) + (list :type-def name tparams ctors)))))))))))) ;; open M / include M — collect a path Ctor(.SubCtor)* and emit ;; (:open PATH) or (:include PATH). diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 3c5f2114..bcf66f4a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1098,6 +1098,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 3901) (eval "(ocaml-run-program \"let x = 10 ;; let _ = x ;; x * 2\")") +;; ── Record type declarations ────────────────────────────────── +(epoch 4000) +(eval "(ocaml-parse-program \"type point = { x : int; y : int }\")") +(epoch 4001) +(eval "(ocaml-parse-program \"type r = { mutable x : int; y : string }\")") +(epoch 4002) +(eval "(ocaml-run-program \"type point = { x : int; y : int };; let p = { x = 3; y = 4 };; p.x + p.y\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1738,6 +1746,11 @@ check 3802 "let-mut f and g" '"Int"' check 3900 "let _ = 1+2;; 42" '42' check 3901 "two top-level lets, _" '20' +# ── Record type declarations ────────────────────────────────── +check 4000 "record type decl" '("type-def-record" "point" () (("x") ("y")))' +check 4001 "mutable field decl" '("mutable" "x")' +check 4002 "record decl + use" '7' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7dd9e206..cb036366 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -403,6 +403,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 1+3 — record type declarations `type r = { x : int; + mutable y : string }` (+3 tests, 447 total). Parser dispatches on + `{` after `=` to parse field list (`mutable` keyword tracked). + AST: `(:type-def-record NAME PARAMS FIELDS)` with FIELDS each being + `(NAME)` or `(:mutable NAME)`. Runtime is no-op (records already + work as dynamic dicts). Field-type sources are skipped; HM type + registration deferred. - 2026-05-08 Phase 5.1 — fizzbuzz.ml baseline (12/12 pass). Classic fizzbuzz using ref-cell accumulator, for-loop, mod, if/elseif chain, String.concat, Int.to_string. Verifies output via String.length. From d9979eaf6c2eb7ff06d75ab07ca11d64d0e4b575 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 18:35:31 +0000 Subject: [PATCH 066/298] ocaml: phase 2 mutable record fields r.f <- v (+4 tests, 451 total) <- added to op-table at level 1 (same as :=). Eval short-circuits on <- to mutate the lhs's field via host SX dict-set!. The lhs must be a :field expression; otherwise raises. Tested: let r = { x = 1; y = 2 } in r.x <- 5; r.x (5) let r = { x = 0 } in for i = 1 to 5 do r.x <- r.x + i done; r.x (15) let r = { name = ...; age = 30 } in r.name <- "Alice"; r.name The 'mutable' keyword in record type decls is parsed-and-discarded; runtime semantics: every field is mutable. Phase 2 closes this gap without changing the dict-based record representation. --- lib/ocaml/eval.sx | 12 ++++++++++++ lib/ocaml/parser.sx | 1 + lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 4 files changed, 40 insertions(+) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 22fa3b2c..e1b67468 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -442,6 +442,18 @@ (let ((cell (ocaml-eval (nth ast 2) env)) (new-val (ocaml-eval (nth ast 3) env))) (begin (set-nth! cell 0 new-val) nil))) + ;; <- mutates a record field. The lhs must be a (:field ...). + ((= op "<-") + (let ((lhs-ast (nth ast 2)) (new-val (ocaml-eval (nth ast 3) env))) + (cond + ((= (ocaml-tag-of lhs-ast) "field") + (let ((target (ocaml-eval (nth lhs-ast 1) env)) + (fname (nth lhs-ast 2))) + (begin (dict-set! target fname new-val) nil))) + (else + (error + (str "ocaml-eval: <- expects a field-access lhs, got " + (ocaml-tag-of lhs-ast))))))) (else (ocaml-eval-op op (ocaml-eval (nth ast 2) env) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index cd9ef2f2..70e3ff25 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -49,6 +49,7 @@ ocaml-op-table (list (list ":=" 1 :right) + (list "<-" 1 :right) (list "||" 2 :right) (list "or" 2 :right) (list "&&" 3 :right) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index bcf66f4a..b9ad1f6c 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1106,6 +1106,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4002) (eval "(ocaml-run-program \"type point = { x : int; y : int };; let p = { x = 3; y = 4 };; p.x + p.y\")") +;; ── Mutable record fields (r.f <- v) ────────────────────────── +(epoch 4100) +(eval "(ocaml-run \"let r = { x = 1; y = 2 } in r.x <- 5; r.x\")") +(epoch 4101) +(eval "(ocaml-run \"let r = { x = 0 } in for i = 1 to 5 do r.x <- r.x + i done; r.x\")") +(epoch 4102) +(eval "(ocaml-run \"let r = { name = \\\"Bob\\\"; age = 30 } in r.name <- \\\"Alice\\\"; r.name\")") +(epoch 4103) +(eval "(ocaml-run \"let r = { x = 1; y = 2 } in r.x <- r.y * 10; r.x\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1751,6 +1761,12 @@ check 4000 "record type decl" '("type-def-record" "point" () (("x") check 4001 "mutable field decl" '("mutable" "x")' check 4002 "record decl + use" '7' +# ── Mutable record fields ────────────────────────────────────── +check 4100 "r.x <- 5; r.x" '5' +check 4101 "for-loop accum r.x" '15' +check 4102 "r.name <- str" '"Alice"' +check 4103 "r.x <- r.y * 10" '20' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index cb036366..d8d9ac78 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -157,6 +157,10 @@ SX CEK evaluator (both JS and OCaml hosts) - [x] Arithmetic, comparison, boolean ops, string `^`, `mod`. - [x] Unit `()` value; `ignore`. - [x] References: `ref`, `!`, `:=`. +- [x] Mutable record fields via `r.f <- v` — uses host SX `dict-set!` + to mutate the underlying record dict in place. All record fields + are de-facto mutable (the `mutable` keyword in type-decls is + currently parsed-and-discarded). - [ ] Mutable record fields. - [x] `for i = lo to hi do ... done` loop; `while cond do ... done` (incl. `downto` direction). @@ -403,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 2 — mutable record fields `r.f <- v` (+4 tests, 451 + total). `<-` added to op-table at level 1 (same as `:=`). Eval + short-circuits on `<-` to mutate the lhs's field via host SX + `dict-set!`. Tested with for-loop accumulator (`for i = 1 to 5 do + r.x <- r.x + i done`) and string-field reassignment. The `mutable` + keyword in record-type decls is parsed-and-discarded; runtime + semantics: every field is mutable. - 2026-05-08 Phase 1+3 — record type declarations `type r = { x : int; mutable y : string }` (+3 tests, 447 total). Parser dispatches on `{` after `=` to parse field list (`mutable` keyword tracked). From 50a219b688831bb1543bfd62184219c7ee0fb505 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 18:43:19 +0000 Subject: [PATCH 067/298] ocaml: phase 5.1 mutable_record.ml baseline (14/14 pass) Counter-style record with two mutable fields. Validates the new r.f <- v field mutation end-to-end through type decl + record literal + field access + field assignment + sequence operator. type counter = { mutable count : int; mutable last : int } let bump c = c.count <- c.count + 1 ; c.last <- c.count After 5 bumps: count=5, last=5, sum=10. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/mutable_record.ml | 15 +++++++++++++++ plans/ocaml-on-sx.md | 4 ++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/mutable_record.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d5b441f4..7bf2e72e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -8,6 +8,7 @@ "fizzbuzz.ml": 57, "list_ops.ml": 30, "module_use.ml": 3, + "mutable_record.ml": 10, "option_match.ml": 5, "quicksort.ml": 44, "sum_squares.ml": 385, diff --git a/lib/ocaml/baseline/mutable_record.ml b/lib/ocaml/baseline/mutable_record.ml new file mode 100644 index 00000000..41fb5302 --- /dev/null +++ b/lib/ocaml/baseline/mutable_record.ml @@ -0,0 +1,15 @@ +(* Baseline: mutable record fields via r.f <- v *) +type counter = { mutable count : int; mutable last : int } ;; + +let bump c = + c.count <- c.count + 1 ; + c.last <- c.count +;; + +let c = { count = 0; last = 0 } ;; +bump c ;; +bump c ;; +bump c ;; +bump c ;; +bump c ;; +c.count + c.last diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d8d9ac78..41d171f2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5.1 — mutable_record.ml baseline (14/14 pass). + Counter-style record with two mutable fields, bump function uses + `r.f <- v` to mutate. End-to-end validates type decl + record + literal + field access + field assignment + sequence operator. - 2026-05-08 Phase 2 — mutable record fields `r.f <- v` (+4 tests, 451 total). `<-` added to op-table at level 1 (same as `:=`). Eval short-circuits on `<-` to mutate the lhs's field via host SX From 360a3ed51f31c8208028a9ea03310d56e5fb75eb Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 19:04:04 +0000 Subject: [PATCH 068/298] ocaml: phase 5.1 queens.ml baseline (15/15 pass) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4-queens via recursive backtracking + List.fold_left. Returns 2 (the two solutions of 4-queens). Per-program timeout in run.sh bumped to 240s — the tree-walking interpreter is slow on heavy recursion but correct. The substrate handles full backtracking + safe-check recursion + list-driven candidate enumeration end-to-end. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/queens.ml | 35 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/run.sh | 2 +- plans/ocaml-on-sx.md | 6 ++++++ 4 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 lib/ocaml/baseline/queens.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7bf2e72e..6b51c383 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -10,6 +10,7 @@ "module_use.ml": 3, "mutable_record.ml": 10, "option_match.ml": 5, + "queens.ml": 2, "quicksort.ml": 44, "sum_squares.ml": 385, "word_count.ml": 3 diff --git a/lib/ocaml/baseline/queens.ml b/lib/ocaml/baseline/queens.ml new file mode 100644 index 00000000..2effdb3c --- /dev/null +++ b/lib/ocaml/baseline/queens.ml @@ -0,0 +1,35 @@ +(* Baseline: n-queens count for n=6. + We count placements of n queens on an n×n board such that no two + share a row, column, or diagonal. *) + +let safe q queens = + let rec go qs offset = + match qs with + | [] -> true + | h :: t -> + if h = q then false + else if h - q = offset then false + else if q - h = offset then false + else go t (offset + 1) + in + go queens 1 +;; + +let rec range a b = + if a > b then [] else a :: range (a + 1) b +;; + +let rec solve n queens row = + if row > n then 1 + else + List.fold_left + (fun acc col -> + if safe col queens then + acc + solve n (col :: queens) (row + 1) + else + acc) + 0 + (range 1 n) +;; + +solve 4 [] 1 diff --git a/lib/ocaml/baseline/run.sh b/lib/ocaml/baseline/run.sh index 8d5e75e7..c26fbdb0 100755 --- a/lib/ocaml/baseline/run.sh +++ b/lib/ocaml/baseline/run.sh @@ -36,7 +36,7 @@ for f in lib/ocaml/baseline/*.ml; do (eval "(ocaml-run-program (file-read \"$f\"))") EOF - output=$(timeout 120 "$SX_SERVER" < "$TMP" 2>/dev/null) + output=$(timeout 240 "$SX_SERVER" < "$TMP" 2>/dev/null) rm -f "$TMP" result=$(echo "$output" | awk ' diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 41d171f2..a5fdfeb0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5.1 — queens.ml baseline (15/15 pass). 4-queens + count via recursive backtracking with `List.fold_left`. Returns 2 + (the two solutions of 4-queens). Per-program timeout in run.sh + bumped to 240s — tree-walking interpreter is slow on heavy recursion + but correct. The substrate handles full backtracking + safe-check + recursion + list-driven candidate enumeration end-to-end. - 2026-05-08 Phase 5.1 — mutable_record.ml baseline (14/14 pass). Counter-style record with two mutable fields, bump function uses `r.f <- v` to mutate. End-to-end validates type decl + record From fff8fe2dc8696231a1ccd3c1619de9f939a050b0 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 19:10:49 +0000 Subject: [PATCH 069/298] ocaml: phase 5.1 memo_fib.ml baseline (16/16 pass) Memoized fibonacci using Hashtbl.find_opt + Hashtbl.add. fib(25) = 75025. Demonstrates mutable Hashtbl through the OCaml stdlib API in real recursive code. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/memo_fib.ml | 15 +++++++++++++++ plans/ocaml-on-sx.md | 3 +++ 3 files changed, 19 insertions(+) create mode 100644 lib/ocaml/baseline/memo_fib.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6b51c383..c4ea6f4e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -7,6 +7,7 @@ "factorial.ml": 3628800, "fizzbuzz.ml": 57, "list_ops.ml": 30, + "memo_fib.ml": 75025, "module_use.ml": 3, "mutable_record.ml": 10, "option_match.ml": 5, diff --git a/lib/ocaml/baseline/memo_fib.ml b/lib/ocaml/baseline/memo_fib.ml new file mode 100644 index 00000000..d4f4a4a9 --- /dev/null +++ b/lib/ocaml/baseline/memo_fib.ml @@ -0,0 +1,15 @@ +(* Baseline: memoized fibonacci using Hashtbl *) +let cache = Hashtbl.create 16 ;; + +let rec fib n = + if n < 2 then n + else + match Hashtbl.find_opt cache n with + | Some v -> v + | None -> + let v = fib (n - 1) + fib (n - 2) in + Hashtbl.add cache n v ; + v +;; + +fib 25 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index a5fdfeb0..5deb193c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,9 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5.1 — memo_fib.ml baseline (16/16 pass). Memoized + fibonacci using `Hashtbl.find_opt` + `Hashtbl.add`. fib(25) = 75025. + Demonstrates mutable dict semantics through the OCaml stdlib API. - 2026-05-08 Phase 5.1 — queens.ml baseline (15/15 pass). 4-queens count via recursive backtracking with `List.fold_left`. Returns 2 (the two solutions of 4-queens). Per-program timeout in run.sh From 8c7ad62b449447c42c16d0d78f4b734c230654b7 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 19:19:17 +0000 Subject: [PATCH 070/298] ocaml: phase 5 HM def-mut + def-rec-mut at top level (+3 tests, 454 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocaml-type-of-program now handles :def-mut (sequential generalize) and :def-rec-mut (pre-bind tvs, infer rhs, unify, generalize all, infer body — same algorithm as the inline let-rec-mut version). Mutual top-level recursion now type-checks: let rec even n = ... and odd n = ...;; even 10 : Bool let rec map f xs = ... and length lst = ...;; map : ('a -> 'b) -> 'a list -> 'b list --- lib/ocaml/infer.sx | 75 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 13 ++++++++ plans/ocaml-on-sx.md | 5 +++ 3 files changed, 93 insertions(+) diff --git a/lib/ocaml/infer.sx b/lib/ocaml/infer.sx index c0f8c0f3..18582509 100644 --- a/lib/ocaml/infer.sx +++ b/lib/ocaml/infer.sx @@ -661,6 +661,81 @@ (let ((r (ocaml-infer (nth decl 1) env counter))) (set! last-type (hm-apply (get r :subst) (get r :type))))) + ((= tag "def-mut") + ;; let x = e and y = e' (top level, no rec) + (let ((bindings (nth decl 1))) + (begin + (define one + (fn (b) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((env2 (hm-apply-env s env))) + (let ((scheme (hm-generalize t env2))) + (begin + (set! env (assoc env2 nm scheme)) + (set! last-type t)))))))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (one (first xs)) (loop (rest xs)))))) + (loop bindings)))) + ((= tag "def-rec-mut") + ;; let rec f = ... and g = ... — mutual recursion at top level. + (let ((bindings (nth decl 1)) (tvs (list)) (env-rec env)) + (begin + (define alloc + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((nm (nth b 0)) (tv (hm-fresh-tv counter))) + (begin + (append! tvs tv) + (set! env-rec (assoc env-rec nm (hm-monotype tv))) + (alloc (rest xs)))))))) + (alloc bindings) + (let ((subst {}) (idx 0)) + (begin + (define infer-one + (fn (b) + (let ((ps (nth b 1)) (rh (nth b 2))) + (let ((rhs-expr (cond + ((= (len ps) 0) rh) + (else (list :fun ps rh))))) + (let ((r (ocaml-infer rhs-expr env-rec counter))) + (let ((s (get r :subst)) (t (get r :type))) + (let ((s2 (ocaml-hm-unify + (hm-apply s (nth tvs idx)) + t + (hm-compose s subst)))) + (begin + (set! subst s2) + (set! idx (+ idx 1)) + (set! last-type (hm-apply s2 t)))))))))) + (define loop2 + (fn (xs) + (when (not (= xs (list))) + (begin (infer-one (first xs)) (loop2 (rest xs)))))) + (loop2 bindings) + (set! env (hm-apply-env subst env)) + (set! idx 0) + (define gen-one + (fn (b) + (let ((nm (nth b 0))) + (let ((scheme (hm-generalize + (hm-apply subst (nth tvs idx)) + env))) + (begin + (set! env (assoc env nm scheme)) + (set! idx (+ idx 1))))))) + (define loop3 + (fn (xs) + (when (not (= xs (list))) + (begin (gen-one (first xs)) (loop3 (rest xs)))))) + (loop3 bindings)))))) (else nil))))) (define loop (fn (xs) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index b9ad1f6c..1bde32a3 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1116,6 +1116,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4103) (eval "(ocaml-run \"let r = { x = 1; y = 2 } in r.x <- r.y * 10; r.x\")") +;; ── HM top-level def-mut / def-rec-mut ──────────────────────── +(epoch 4200) +(eval "(ocaml-type-of-program \"let x = 1 and y = 2;; x + y\")") +(epoch 4201) +(eval "(ocaml-type-of-program \"let rec even n = if n = 0 then true else odd (n - 1) and odd n = if n = 0 then false else even (n - 1);; even 10\")") +(epoch 4202) +(eval "(ocaml-type-of-program \"let rec map f xs = match xs with | [] -> [] | h :: t -> f h :: map f t and length lst = match lst with | [] -> 0 | _ :: t -> 1 + length t;; map\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1767,6 +1775,11 @@ check 4101 "for-loop accum r.x" '15' check 4102 "r.name <- str" '"Alice"' check 4103 "r.x <- r.y * 10" '20' +# ── HM top-level def-mut / def-rec-mut ───────────────────────── +check 4200 "let-mut x+y" '"Int"' +check 4201 "let-rec-mut even 10" '"Bool"' +check 4202 "let-rec-mut map+length" 'list -> ' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5deb193c..d7ccc61d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5 — HM for top-level `let..and..` decls (+3 + tests, 454 total). `ocaml-type-of-program` now handles `:def-mut` + (sequential generalization) and `:def-rec-mut` (mutual recursion + with shared tvs) decls. Mutual `even`/`odd` and `map`/`length` + type-check at top level. - 2026-05-08 Phase 5.1 — memo_fib.ml baseline (16/16 pass). Memoized fibonacci using `Hashtbl.find_opt` + `Hashtbl.add`. fib(25) = 75025. Demonstrates mutable dict semantics through the OCaml stdlib API. From ce81ce2e95df1c69ff448ca811a97705013dfb9b Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 19:42:00 +0000 Subject: [PATCH 071/298] ocaml: phase 6 Char predicates (+7 tests, 461 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Char.is_digit / is_alpha / is_alnum / is_whitespace / is_upper / is_lower / is_space — all written in OCaml using Char.code + ASCII range checks. --- lib/ocaml/runtime.sx | 10 ++++++++++ lib/ocaml/test.sh | 25 +++++++++++++++++++++++++ plans/ocaml-on-sx.md | 3 +++ 3 files changed, 38 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 52a27d08..d1ceec8f 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -377,6 +377,16 @@ let chr n = _char_chr n let lowercase_ascii c = _string_lower c let uppercase_ascii c = _string_upper c + let is_digit c = + let n = _char_code c in n >= 48 && n <= 57 + let is_lower c = + let n = _char_code c in n >= 97 && n <= 122 + let is_upper c = + let n = _char_code c in n >= 65 && n <= 90 + let is_alpha c = is_lower c || is_upper c + let is_alnum c = is_alpha c || is_digit c + let is_whitespace c = + c = \" \" || c = \"\\t\" || c = \"\\n\" || c = \"\\r\" end ;; module Int = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 1bde32a3..b86b1c14 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1124,6 +1124,22 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4202) (eval "(ocaml-type-of-program \"let rec map f xs = match xs with | [] -> [] | h :: t -> f h :: map f t and length lst = match lst with | [] -> 0 | _ :: t -> 1 + length t;; map\")") +;; ── Char predicate helpers ──────────────────────────────────── +(epoch 4300) +(eval "(ocaml-run \"Char.is_digit \\\"5\\\"\")") +(epoch 4301) +(eval "(ocaml-run \"Char.is_digit \\\"x\\\"\")") +(epoch 4302) +(eval "(ocaml-run \"Char.is_alpha \\\"x\\\"\")") +(epoch 4303) +(eval "(ocaml-run \"Char.is_alnum \\\"5\\\"\")") +(epoch 4304) +(eval "(ocaml-run \"Char.is_whitespace \\\" \\\"\")") +(epoch 4305) +(eval "(ocaml-run \"Char.is_upper \\\"A\\\"\")") +(epoch 4306) +(eval "(ocaml-run \"Char.is_lower \\\"a\\\"\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1780,6 +1796,15 @@ check 4200 "let-mut x+y" '"Int"' check 4201 "let-rec-mut even 10" '"Bool"' check 4202 "let-rec-mut map+length" 'list -> ' +# ── Char predicates ──────────────────────────────────────────── +check 4300 "Char.is_digit 5" 'true' +check 4301 "Char.is_digit x" 'false' +check 4302 "Char.is_alpha x" 'true' +check 4303 "Char.is_alnum 5" 'true' +check 4304 "Char.is_whitespace ' '" 'true' +check 4305 "Char.is_upper A" 'true' +check 4306 "Char.is_lower a" 'true' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d7ccc61d..b404aad6 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,9 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 6 — Char predicates: is_digit/is_alpha/is_alnum/ + is_whitespace/is_upper/is_lower (+7 tests, 461 total). All written + in OCaml in runtime.sx using Char.code + ASCII range checks. - 2026-05-08 Phase 5 — HM for top-level `let..and..` decls (+3 tests, 454 total). `ocaml-type-of-program` now handles `:def-mut` (sequential generalization) and `:def-rec-mut` (mutual recursion From de8b1dd6812f7351efec298f487b6b5a9439207d Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 19:49:08 +0000 Subject: [PATCH 072/298] ocaml: phase 5.1 lambda_calc.ml baseline (17/17 pass) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Untyped lambda calculus interpreter inside OCaml-on-SX: type term = Var | Abs of string * term | App | Num of int type value = VNum of int | VClos of string * term * env let rec eval env t = match t with ... (\x.\y.x) 7 99 = 7. The substrate handles two ADTs, recursive eval, closure-based env, and pattern matching all written as a single self-contained OCaml program — strong validation. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/lambda_calc.ml | 41 +++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 +++++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/lambda_calc.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c4ea6f4e..27ecf27b 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -7,6 +7,7 @@ "factorial.ml": 3628800, "fizzbuzz.ml": 57, "list_ops.ml": 30, + "lambda_calc.ml": 7, "memo_fib.ml": 75025, "module_use.ml": 3, "mutable_record.ml": 10, diff --git a/lib/ocaml/baseline/lambda_calc.ml b/lib/ocaml/baseline/lambda_calc.ml new file mode 100644 index 00000000..7944be4e --- /dev/null +++ b/lib/ocaml/baseline/lambda_calc.ml @@ -0,0 +1,41 @@ +(* Baseline: untyped lambda calculus with closures over a Hashtbl env *) +type term = + | Var of string + | Abs of string * term + | App of term * term + | Num of int +;; + +type value = + | VNum of int + | VClos of string * term * (string * value) list +;; + +let rec lookup name env = + match env with + | [] -> failwith "unbound" + | (n, v) :: t -> if n = name then v else lookup name t +;; + +let rec eval env t = + match t with + | Num n -> VNum n + | Var x -> lookup x env + | Abs (x, body) -> VClos (x, body, env) + | App (f, a) -> + let fv = eval env f in + let av = eval env a in + (match fv with + | VClos (param, body, captured) -> + eval ((param, av) :: captured) body + | _ -> failwith "not a function") +;; + +let unwrap v = match v with VNum n -> n | _ -> failwith "not a number" ;; + +(* (\x. \y. x) 7 99 = 7 *) +let term = + App (App (Abs ("x", Abs ("y", Var "x")), Num 7), Num 99) +;; + +unwrap (eval [] term) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b404aad6..879a0226 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5.1 — lambda_calc.ml baseline (17/17 pass). Untyped + lambda calculus interpreter using two ADTs (`type term = Var | Abs | + App | Num`, `type value = VNum | VClos`), an env as `(string * value) + list`, and recursive eval. `(\x.\y.x) 7 99 = 7` end-to-end. Demonstrates + the substrate handles a non-trivial AST + closure-based evaluator + written in OCaml-on-SX. - 2026-05-08 Phase 6 — Char predicates: is_digit/is_alpha/is_alnum/ is_whitespace/is_upper/is_lower (+7 tests, 461 total). All written in OCaml in runtime.sx using Char.code + ASCII range checks. From 8fab20c8bcf5b568ab5aac1801915d4a7ac3d2c7 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 19:57:09 +0000 Subject: [PATCH 073/298] ocaml: phase 5.1 anagrams.ml baseline (18/18 pass) Group anagrams by canonical (sorted-chars) key using Hashtbl + List.sort. Demonstrates char-by-char traversal via String.get + for-loop + ref accumulator + Hashtbl as a multi-valued counter. --- lib/ocaml/baseline/anagrams.ml | 26 ++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 3 +++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/anagrams.ml diff --git a/lib/ocaml/baseline/anagrams.ml b/lib/ocaml/baseline/anagrams.ml new file mode 100644 index 00000000..a1f4666a --- /dev/null +++ b/lib/ocaml/baseline/anagrams.ml @@ -0,0 +1,26 @@ +(* Baseline: count anagram groups using Hashtbl + sort *) + +(* Sort the chars in a string to get its anagram-equivalence key *) +let canonical s = + let n = String.length s in + let chars = ref [] in + for i = 0 to n - 1 do + chars := (String.get s i) :: !chars + done ; + let sorted = List.sort compare !chars in + String.concat "" sorted +;; + +let count_groups words = + let counts = Hashtbl.create 16 in + List.iter + (fun w -> + let k = canonical w in + match Hashtbl.find_opt counts k with + | None -> Hashtbl.add counts k 1 + | Some n -> Hashtbl.replace counts k (n + 1)) + words ; + Hashtbl.length counts +;; + +count_groups ["eat"; "tea"; "tan"; "ate"; "nat"; "bat"] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 27ecf27b..c22a2ed9 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,4 +1,5 @@ { + "anagrams.ml": 3, "btree.ml": 39, "calc.ml": 13, "closures.ml": 315, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 879a0226..b1fa4cb7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,9 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5.1 — anagrams.ml baseline (18/18 pass). Counts + anagram-equivalence groups via Hashtbl + List.sort + String.get + + for-loop. `["eat";"tea";"tan";"ate";"nat";"bat"]` → 3 groups. - 2026-05-08 Phase 5.1 — lambda_calc.ml baseline (17/17 pass). Untyped lambda calculus interpreter using two ADTs (`type term = Var | Abs | App | Num`, `type value = VNum | VClos`), an env as `(string * value) From b92a98fb453ec33e3933115fb08a0dc324d0acc6 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 20:12:35 +0000 Subject: [PATCH 074/298] ocaml: refresh scoreboard (480/480 across 15 suites incl. 18 baseline programs) --- lib/ocaml/scoreboard.json | 16 ++++++++-------- lib/ocaml/scoreboard.md | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/ocaml/scoreboard.json b/lib/ocaml/scoreboard.json index 82bdcb67..69bf4049 100644 --- a/lib/ocaml/scoreboard.json +++ b/lib/ocaml/scoreboard.json @@ -1,22 +1,22 @@ { "suites": { - "baseline": {"pass": 13, "fail": 0}, + "baseline": {"pass": 18, "fail": 0}, "eval-core": {"pass": 51, "fail": 0}, - "let-and": {"pass": 4, "fail": 0}, - "misc": {"pass": 98, "fail": 0}, + "let-and": {"pass": 5, "fail": 0}, + "misc": {"pass": 105, "fail": 0}, "parser": {"pass": 106, "fail": 0}, "phase1-params": {"pass": 2, "fail": 0}, "phase2-exn": {"pass": 8, "fail": 0}, "phase2-function": {"pass": 3, "fail": 0}, - "phase2-loops": {"pass": 4, "fail": 0}, + "phase2-loops": {"pass": 5, "fail": 0}, "phase2-refs": {"pass": 6, "fail": 0}, "phase3-adt": {"pass": 33, "fail": 0}, "phase4-modules": {"pass": 14, "fail": 0}, - "phase5-hm": {"pass": 38, "fail": 0}, - "phase6-stdlib": {"pass": 60, "fail": 0}, + "phase5-hm": {"pass": 39, "fail": 0}, + "phase6-stdlib": {"pass": 67, "fail": 0}, "tokenize": {"pass": 18, "fail": 0} }, - "total_pass": 458, + "total_pass": 480, "total_fail": 0, - "total": 458 + "total": 480 } diff --git a/lib/ocaml/scoreboard.md b/lib/ocaml/scoreboard.md index 7194c2e8..b09d6da1 100644 --- a/lib/ocaml/scoreboard.md +++ b/lib/ocaml/scoreboard.md @@ -1,21 +1,21 @@ # OCaml-on-SX scoreboard -458 / 458 tests passing. +480 / 480 tests passing. | Suite | Pass | Fail | |---|---:|---:| -| baseline | 13 | 0 | +| baseline | 18 | 0 | | eval-core | 51 | 0 | -| let-and | 4 | 0 | -| misc | 98 | 0 | +| let-and | 5 | 0 | +| misc | 105 | 0 | | parser | 106 | 0 | | phase1-params | 2 | 0 | | phase2-exn | 8 | 0 | | phase2-function | 3 | 0 | -| phase2-loops | 4 | 0 | +| phase2-loops | 5 | 0 | | phase2-refs | 6 | 0 | | phase3-adt | 33 | 0 | | phase4-modules | 14 | 0 | -| phase5-hm | 38 | 0 | -| phase6-stdlib | 60 | 0 | +| phase5-hm | 39 | 0 | +| phase6-stdlib | 67 | 0 | | tokenize | 18 | 0 | From 029c1783f456e138826545815ef01cd2443c1aa2 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 20:26:28 +0000 Subject: [PATCH 075/298] ocaml: phase 1+3 'when' guard in 'function | pat -> body' (+3 tests, 464 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse-function now consumes optional 'when GUARD-EXPR' before -> and emits (:case-when PAT GUARD BODY) — same handling as match clauses. function-style sign extraction now works: (function | n when n > 0 -> 1 | n when n < 0 -> -1 | _ -> 0) --- lib/ocaml/parser.sx | 13 ++++++++++--- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 3 +++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 70e3ff25..cfbff29f 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -787,7 +787,7 @@ (list :try expr cases))))))) (define parse-function (fn () - ;; `function | pat -> body | …` ≡ fun x -> match x with | pat -> body + ;; `function | pat [when GUARD] -> body | …` (let () (begin (when (at-op? "|") (advance-tok!)) @@ -795,11 +795,18 @@ (begin (define one (fn () - (let ((p (parse-pattern))) + (let ((p (parse-pattern)) (guard nil)) (begin + (when (at-kw? "when") + (begin (advance-tok!) + (set! guard (parse-expr-no-seq)))) (consume! "op" "->") (let ((body (parse-expr))) - (append! cases (list :case p body))))))) + (cond + ((= guard nil) + (append! cases (list :case p body))) + (else + (append! cases (list :case-when p guard body))))))))) (one) (define loop (fn () diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index b86b1c14..1501d238 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1140,6 +1140,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4306) (eval "(ocaml-run \"Char.is_lower \\\"a\\\"\")") +;; ── function with `when` guard ──────────────────────────────── +(epoch 4400) +(eval "(ocaml-run \"(function | n when n > 0 -> 1 | _ -> 0) 5\")") +(epoch 4401) +(eval "(ocaml-run \"(function | n when n > 0 -> 1 | _ -> 0) (-3)\")") +(epoch 4402) +(eval "(ocaml-run \"(function | n when n > 0 -> 1 | n when n < 0 -> -1 | _ -> 0) 0\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1805,6 +1813,11 @@ check 4304 "Char.is_whitespace ' '" 'true' check 4305 "Char.is_upper A" 'true' check 4306 "Char.is_lower a" 'true' +# ── function with `when` guard ───────────────────────────────── +check 4400 "function when 5" '1' +check 4401 "function when -3" '0' +check 4402 "function sign 0" '0' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b1fa4cb7..7d0d292e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,9 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 1+3 — `function | pat when GUARD -> body | …` + guard support (+3 tests, 464 total). `parse-function` mirrors the + match-clause when-handling. - 2026-05-08 Phase 5.1 — anagrams.ml baseline (18/18 pass). Counts anagram-equivalence groups via Hashtbl + List.sort + String.get + for-loop. `["eat";"tea";"tan";"ate";"nat";"bat"]` → 3 groups. From c7d8b7dd6276845788f5bbdb9c26c2707536778b Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 20:36:02 +0000 Subject: [PATCH 076/298] ocaml: phase 2+3 'when' guard in try/with (+3 tests, 467 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse-try now consumes optional 'when GUARD-EXPR' before -> and emits (:case-when PAT GUARD BODY). Eval try clause loop dispatches on case / case-when and falls through on guard false — same semantics as match. Examples: try raise (E 5) with | E n when n > 0 -> n | _ -> 0 = 5 try raise (E (-3)) with | E n when n > 0 -> n | _ -> 0 = 0 try raise (E 5) with | E n when n > 100 -> n | E n -> n + 1000 = 1005 --- lib/ocaml/eval.sx | 32 +++++++++++++++++++++++--------- lib/ocaml/parser.sx | 11 +++++++++-- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 4 ++++ 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index e1b67468..87e55790 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -592,8 +592,9 @@ (loop))))) nil))) ((= tag "try") - ;; (:try EXPR CLAUSES) — evaluate EXPR; if it raises, match the - ;; raised value against CLAUSES. Re-raise on no-match. + ;; (:try EXPR CLAUSES) — evaluate EXPR; if it raises, match + ;; the raised value against CLAUSES (case + case-when). + ;; Re-raise on no-match. (let ((expr (nth ast 1)) (clauses (nth ast 2)) (env-cap env)) (guard (e (else @@ -604,13 +605,26 @@ ((empty? cs) (raise e)) (else (let ((clause (first cs))) - (let ((pat (nth clause 1)) - (body (nth clause 2))) - (let ((env2 (ocaml-match-pat pat e env-cap))) - (cond - ((= env2 ocaml-match-fail) - (try-clauses (rest cs))) - (else (ocaml-eval body env2)))))))))) + (let ((ctag (nth clause 0))) + (cond + ((= ctag "case") + (let ((pat (nth clause 1)) + (body (nth clause 2))) + (let ((env2 (ocaml-match-pat pat e env-cap))) + (cond + ((= env2 ocaml-match-fail) + (try-clauses (rest cs))) + (else (ocaml-eval body env2)))))) + ((= ctag "case-when") + (let ((pat (nth clause 1)) + (g (nth clause 2)) + (body (nth clause 3))) + (let ((env2 (ocaml-match-pat pat e env-cap))) + (cond + ((= env2 ocaml-match-fail) (try-clauses (rest cs))) + ((not (ocaml-eval g env2)) (try-clauses (rest cs))) + (else (ocaml-eval body env2)))))) + (else (raise e))))))))) (try-clauses clauses)))) (ocaml-eval expr env-cap)))) ((= tag "while") diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index cfbff29f..e425d6e4 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -773,11 +773,18 @@ (begin (define one (fn () - (let ((p (parse-pattern))) + (let ((p (parse-pattern)) (guard nil)) (begin + (when (at-kw? "when") + (begin (advance-tok!) + (set! guard (parse-expr-no-seq)))) (consume! "op" "->") (let ((body (parse-expr))) - (append! cases (list :case p body))))))) + (cond + ((= guard nil) + (append! cases (list :case p body))) + (else + (append! cases (list :case-when p guard body))))))))) (one) (define loop (fn () diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 1501d238..bb87b15e 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1148,6 +1148,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4402) (eval "(ocaml-run \"(function | n when n > 0 -> 1 | n when n < 0 -> -1 | _ -> 0) 0\")") +;; ── try/with `when` guard ───────────────────────────────────── +(epoch 4500) +(eval "(ocaml-run \"try raise (E 5) with | E n when n > 0 -> n | _ -> 0\")") +(epoch 4501) +(eval "(ocaml-run \"try raise (E (-3)) with | E n when n > 0 -> n | _ -> 0\")") +(epoch 4502) +(eval "(ocaml-run \"try raise (E 5) with | E n when n > 100 -> n | E n -> n + 1000\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1818,6 +1826,11 @@ check 4400 "function when 5" '1' check 4401 "function when -3" '0' check 4402 "function sign 0" '0' +# ── try/with `when` guard ────────────────────────────────────── +check 4500 "try when guard fires" '5' +check 4501 "try when guard skips" '0' +check 4502 "try when fall through" '1005' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7d0d292e..64922541 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 2+3 — `try ... with | pat when GUARD -> body` guard + support (+3 tests, 467 total). parse-try mirrors match/function; + eval-try clause loop now dispatches on `case`/`case-when` and falls + through to next clause when guard is false. - 2026-05-08 Phase 1+3 — `function | pat when GUARD -> body | …` guard support (+3 tests, 464 total). `parse-function` mirrors the match-clause when-handling. From ce75bd684865e017a3d4bc22251911195db02d7f Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 20:49:26 +0000 Subject: [PATCH 077/298] ocaml: phase 1+5.1 type aliases + poly_stack baseline (+3 tests, 469 / 19 baseline) Parser: in parse-decl-type, dispatch on the post-= token: '|' or Ctor -> sum type '{' -> record type otherwise -> type alias (skip to boundary) AST (:type-alias NAME PARAMS) with body discarded. Runtime no-op since SX has no nominal types. poly_stack.ml baseline exercises: module type ELEMENT = sig type t val show : t -> string end module IntElem = struct type t = int let show x = ... end module Make (E : ELEMENT) = struct ... use E.show ... end module IntStack = Make(IntElem) Demonstrates the substrate handles signature decls + abstract types + functor parameter with sig constraint. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/poly_stack.ml | 27 +++++++++++++++++++++++++++ lib/ocaml/eval.sx | 4 ++++ lib/ocaml/parser.sx | 23 +++++++++++++++++++++++ lib/ocaml/test.sh | 10 ++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 6 files changed, 72 insertions(+) create mode 100644 lib/ocaml/baseline/poly_stack.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c22a2ed9..2c99498f 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -13,6 +13,7 @@ "module_use.ml": 3, "mutable_record.ml": 10, "option_match.ml": 5, + "poly_stack.ml": 5, "queens.ml": 2, "quicksort.ml": 44, "sum_squares.ml": 385, diff --git a/lib/ocaml/baseline/poly_stack.ml b/lib/ocaml/baseline/poly_stack.ml new file mode 100644 index 00000000..fde6b6df --- /dev/null +++ b/lib/ocaml/baseline/poly_stack.ml @@ -0,0 +1,27 @@ +(* Baseline: polymorphic stack via functor over an Element module *) +module type ELEMENT = sig type t val show : t -> string end ;; + +module IntElem = struct + type t = int + let show x = Int.to_string x +end ;; + +module Make (E : ELEMENT) = struct + let create () = ref [] + let push x s = s := x :: !s + let pop s = + match !s with + | [] -> None + | h :: t -> s := t ; Some h + let length s = List.length !s + let to_string s = + String.concat "," (List.map E.show !s) +end ;; + +module IntStack = Make(IntElem) ;; + +let s = IntStack.create () ;; +IntStack.push 1 s ;; +IntStack.push 2 s ;; +IntStack.push 3 s ;; +String.length (IntStack.to_string s) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 87e55790..778e0d16 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -807,6 +807,7 @@ (set! result (merge result (dict mname mod-val)))))))) ((= tag "type-def") nil) ((= tag "type-def-record") nil) + ((= tag "type-alias") nil) ((= tag "exception-def") nil) ((= tag "module-type-def") nil) ((= tag "open") @@ -993,6 +994,9 @@ ;; type r = { x : T; y : T } — runtime no-op; records ;; are already dynamic dicts. nil) + ((= tag "type-alias") + ;; type t = SomeType — runtime no-op (no nominal types). + nil) ((= tag "module-type-def") ;; module type S = sig … end — no-op at runtime. nil) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index e425d6e4..3abdd84a 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -1242,6 +1242,29 @@ (field-more) (consume! "op" "}") (list :type-def-record name tparams fields))))) + ;; Type alias: type t = int / type t = 'a list / etc. + ;; Detected when next token is NOT `|` and NOT a ctor. + ((and (not (at-op? "|")) + (not (= (ocaml-tok-type (peek-tok)) "ctor"))) + (begin + ;; Skip the alias source up to the next boundary. + (define skip-alias + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? ";;") nil) + ((at-kw? "let") nil) + ((at-kw? "type") nil) + ((at-kw? "and") nil) + ((at-kw? "module") nil) + ((at-kw? "exception") nil) + ((at-kw? "open") nil) + ((at-kw? "include") nil) + ((at-kw? "end") nil) + (else (begin (advance-tok!) (skip-alias)))))) + (skip-alias) + (list :type-alias name tparams))) (else (begin (when (at-op? "|") (advance-tok!)) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index bb87b15e..88ef8c59 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1156,6 +1156,12 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4502) (eval "(ocaml-run \"try raise (E 5) with | E n when n > 100 -> n | E n -> n + 1000\")") +;; ── type aliases ────────────────────────────────────────────── +(epoch 4600) +(eval "(ocaml-parse-program \"type t = int\")") +(epoch 4601) +(eval "(ocaml-run-program \"type t = int;; 42\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1831,6 +1837,10 @@ check 4500 "try when guard fires" '5' check 4501 "try when guard skips" '0' check 4502 "try when fall through" '1005' +# ── type aliases ─────────────────────────────────────────────── +check 4600 "type t = int parses" '("type-alias" "t" ())' +check 4601 "type alias decl + use" '42' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 64922541..f03f97fa 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 1+5.1 — type aliases + poly_stack baseline (+3 + tests, 469 total + 19 baseline). Parser dispatch on the post-`=` + token: `|` or `Ctor` → sum, `{` → record, otherwise → alias (skip + to boundary). AST `(:type-alias NAME PARAMS)` with body discarded. + Runtime no-op. poly_stack.ml baseline exercises a functor whose + parameter has `type t = int` (record alias) + `let show : t -> + string`. Stack uses ref + module field lookup to format ints. - 2026-05-08 Phase 2+3 — `try ... with | pat when GUARD -> body` guard support (+3 tests, 467 total). parse-try mirrors match/function; eval-try clause loop now dispatches on `case`/`case-when` and falls From a4ef9a8ec9f9126c7c99d3a70cd1c5ffc584234a Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 20:58:50 +0000 Subject: [PATCH 078/298] ocaml: phase 1 type annotations on let / (e : T) (+4 tests, 473 total) let NAME [PARAMS] : T = expr and (expr : T) parse and skip the type source. Runtime no-op since SX is dynamic. Works in inline let, top-level let, and parenthesised expressions: let x : int = 5 ;; x + 1 -> 6 let f (x : int) : int = x + 1 in f 41 -> 42 (5 : int) -> 5 ((1 + 2) : int) * 3 -> 9 --- lib/ocaml/parser.sx | 40 +++++++++++++++++++++++++++++++++++++++- lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 3abdd84a..c5aabfa8 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -422,7 +422,21 @@ (else (let ((e (parse-expr))) - (begin (consume! "op" ")") e)))))) + (begin + ;; Optional type annotation `(e : T)` — skip + ;; the type source before `)`. + (when (at-op? ":") + (begin + (advance-tok!) + (define skip-pty + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? ")") nil) + (else (begin (advance-tok!) (skip-pty)))))) + (skip-pty))) + (consume! "op" ")") e)))))) ((and (= tt "op") (= tv "[")) (begin (advance-tok!) @@ -683,6 +697,18 @@ (when (not (= p nil)) (begin (append! ps p) (collect-params)))))) (collect-params) + ;; Optional type annotation: skip `: TYPE` before `=`. + (when (at-op? ":") + (begin + (advance-tok!) + (define skip-tann + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? "=") nil) + (else (begin (advance-tok!) (skip-tann)))))) + (skip-tann))) (consume! "op" "=") (let ((rhs (parse-expr))) (append! bindings (list nm ps rhs))))))) @@ -1101,6 +1127,18 @@ (collect-params))) (else nil)))) (collect-params) + ;; Optional type annotation: skip `: TYPE` before `=`. + (when (at-op? ":") + (begin + (advance-tok!) + (define skip-tann + (fn () + (cond + ((>= idx tok-len) nil) + ((= (ocaml-tok-type (peek-tok)) "eof") nil) + ((at-op? "=") nil) + (else (begin (advance-tok!) (skip-tann)))))) + (skip-tann))) (consume! "op" "=") (let ((expr-start (cur-pos))) (begin diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 88ef8c59..3b5edbef 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1162,6 +1162,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4601) (eval "(ocaml-run-program \"type t = int;; 42\")") +;; ── Type annotations: let x : T = e and (e : T) ────────────── +(epoch 4700) +(eval "(ocaml-run-program \"let x : int = 5;; x + 1\")") +(epoch 4701) +(eval "(ocaml-run \"let f (x : int) : int = x + 1 in f 41\")") +(epoch 4702) +(eval "(ocaml-run \"(5 : int)\")") +(epoch 4703) +(eval "(ocaml-run \"((1 + 2) : int) * 3\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1841,6 +1851,12 @@ check 4502 "try when fall through" '1005' check 4600 "type t = int parses" '("type-alias" "t" ())' check 4601 "type alias decl + use" '42' +# ── Type annotations ─────────────────────────────────────────── +check 4700 "let x : int = 5" '6' +check 4701 "let f (x : int) : int" '42' +check 4702 "(5 : int)" '5' +check 4703 "((1+2) : int) * 3" '9' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f03f97fa..74a565b6 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 1 — type annotations on let-bindings and parens + expressions (+4 tests, 473 total). `let NAME [PARAMS] : T = expr` + and `(expr : T)` parse and skip the type source. Runtime no-op + (dynamic). Works in inline let, top-level let, and parenthesised + expressions: `let f (x : int) : int = x + 1 in f 41`. - 2026-05-08 Phase 1+5.1 — type aliases + poly_stack baseline (+3 tests, 469 total + 19 baseline). Parser dispatch on the post-`=` token: `|` or `Ctor` → sum, `{` → record, otherwise → alias (skip From 6d9ac1e55abed22a3c4c867b3d4bb00fc704550b Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 21:05:32 +0000 Subject: [PATCH 079/298] ocaml: phase 5.1 bfs.ml baseline (20/20 pass) Graph BFS using Queue + Hashtbl visited-set + List.assoc_opt + List.iter. Returns 6 for a graph where A reaches B/C/D/E/F. Demonstrates 4 stdlib modules (Queue, Hashtbl, List) cooperating in a real algorithm. --- lib/ocaml/baseline/bfs.ml | 43 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 4 +++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/bfs.ml diff --git a/lib/ocaml/baseline/bfs.ml b/lib/ocaml/baseline/bfs.ml new file mode 100644 index 00000000..5f5fb66e --- /dev/null +++ b/lib/ocaml/baseline/bfs.ml @@ -0,0 +1,43 @@ +(* Baseline: graph BFS using Queue + Hashtbl visited set. + Returns the count of reachable nodes. *) + +(* Adjacency as an assoc list of (node, neighbors). *) +let graph = + [ ("A", ["B"; "C"]) + ; ("B", ["D"]) + ; ("C", ["D"; "E"]) + ; ("D", ["F"]) + ; ("E", ["F"]) + ; ("F", []) + ] +;; + +let neighbors n = + match List.assoc_opt n graph with + | None -> [] + | Some ns -> ns +;; + +let bfs start = + let visited = Hashtbl.create 16 in + let q = Queue.create () in + Queue.push start q ; + Hashtbl.add visited start true ; + let rec loop () = + if Queue.is_empty q then () + else + let v = Queue.pop q in + List.iter + (fun n -> + if not (Hashtbl.mem visited n) then begin + Hashtbl.add visited n true ; + Queue.push n q + end) + (neighbors v) ; + loop () + in + loop () ; + Hashtbl.length visited +;; + +bfs "A" diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 2c99498f..7ddf40cd 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,5 +1,6 @@ { "anagrams.ml": 3, + "bfs.ml": 6, "btree.ml": 39, "calc.ml": 13, "closures.ml": 315, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 74a565b6..db6ce1b4 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5.1 — bfs.ml baseline (20/20 pass). Graph + breadth-first search using Queue + Hashtbl visited-set + List.assoc_opt + + List.iter. Returns the count of reachable nodes (6 for the demo + graph A→B→D→F, A→C→{D,E}, E→F). - 2026-05-08 Phase 1 — type annotations on let-bindings and parens expressions (+4 tests, 473 total). `let NAME [PARAMS] : T = expr` and `(expr : T)` parse and skip the type source. Runtime no-op From 0530120bc713331281f02fd6c2170f1f6b6ae84b Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 21:14:07 +0000 Subject: [PATCH 080/298] ocaml: phase 4 def-mut / def-rec-mut inside modules (+2 tests, 475 total) ocaml-eval-module now handles :def-mut and :def-rec-mut decls so 'module M = struct let rec a n = ... and b n = ... end' works. The def-rec-mut version uses cell-based mutual recursion exactly as the top-level version. --- lib/ocaml/eval.sx | 49 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 10 +++++++++ plans/ocaml-on-sx.md | 4 ++++ 3 files changed, 63 insertions(+) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 778e0d16..58c323dd 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -782,6 +782,55 @@ (begin (set! env (ocaml-env-extend env name v)) (set! result (merge result (dict name v)))))))))) + ((= tag "def-mut") + ;; let x = ... and y = ... — sequential top-level binds. + (let ((bs (nth decl 1))) + (begin + (define run-one + (fn (b) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((v (if (= (len ps) 0) + (ocaml-eval rh env) + (ocaml-make-curried ps rh env)))) + (begin + (set! env (ocaml-env-extend env nm v)) + (set! result (merge result (dict nm v)))))))) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin (run-one (first xs)) (loop (rest xs)))))) + (loop bs)))) + ((= tag "def-rec-mut") + ;; let rec f = ... and g = ... — mutual recursion. + (let ((bs (nth decl 1)) (cells (list))) + (begin + (define alloc + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((c (list nil)) (nm (nth b 0))) + (begin + (append! cells c) + (set! env (ocaml-env-extend env nm + (fn (a) ((nth c 0) a)))) + (alloc (rest xs)))))))) + (alloc bs) + (let ((idx 0)) + (begin + (define fill + (fn (xs) + (when (not (= xs (list))) + (let ((b (first xs))) + (let ((nm (nth b 0)) (ps (nth b 1)) (rh (nth b 2))) + (let ((v (if (= (len ps) 0) + (ocaml-eval rh env) + (ocaml-make-curried ps rh env)))) + (begin + (set-nth! (nth cells idx) 0 v) + (set! result (merge result (dict nm v))) + (set! idx (+ idx 1)) + (fill (rest xs))))))))) + (fill bs)))))) ((= tag "expr") (ocaml-eval (nth decl 1) env)) ((= tag "module-def") diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 3b5edbef..8499f101 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1172,6 +1172,12 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4703) (eval "(ocaml-run \"((1 + 2) : int) * 3\")") +;; ── Module body: def-mut / def-rec-mut ───────────────────────── +(epoch 4800) +(eval "(ocaml-run-program \"module M = struct let rec a n = if n = 0 then 0 else b (n - 1) and b n = if n = 0 then 1 else a (n - 1) end ;; M.a 5\")") +(epoch 4801) +(eval "(ocaml-run-program \"module M = struct let x = 1 and y = 2 end ;; M.x + M.y\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1857,6 +1863,10 @@ check 4701 "let f (x : int) : int" '42' check 4702 "(5 : int)" '5' check 4703 "((1+2) : int) * 3" '9' +# ── Module body: def-mut / def-rec-mut ───────────────────────── +check 4800 "module rec a/b mutual" '1' +check 4801 "module x and y" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index db6ce1b4..0ebacfc1 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 4 — `:def-mut` / `:def-rec-mut` inside module + bodies (+2 tests, 475 total). `ocaml-eval-module` now handles + multi-binding `let .. and ..` decls. `module M = struct let rec a n = + ... and b n = ... end` works. - 2026-05-08 Phase 5.1 — bfs.ml baseline (20/20 pass). Graph breadth-first search using Queue + Hashtbl visited-set + List.assoc_opt + List.iter. Returns the count of reachable nodes (6 for the demo From 6dc535dde33584dca9ead72854212e1194b7fcea Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 21:33:14 +0000 Subject: [PATCH 081/298] ocaml: phase 4 'let open M in body' local opens (+3 tests, 478 total) Parser detects 'let open' as a separate let-form, parses M as a path (Ctor(.Ctor)*) directly via inline AST construction (no source slicing since cur-pos is only available in ocaml-parse-program), and emits (:let-open PATH BODY). Eval resolves the path to a module dict and merges its bindings into the env for body evaluation. Now: let open List in map (fun x -> x * 2) [1;2;3] = [2;4;6] let open Option in map (fun x -> x + 1) (Some 5) = Some 6 --- lib/ocaml/eval.sx | 8 ++++++++ lib/ocaml/parser.sx | 33 ++++++++++++++++++++++++++++++++- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 4 files changed, 59 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 58c323dd..6a22b22f 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -697,6 +697,14 @@ (fill (rest xs))))))))) (fill bindings) (ocaml-eval body env2)))))) + ((= tag "let-open") + ;; `let open M in body` — extend env with M's bindings, eval body. + (let ((path-expr (nth ast 1)) (body (nth ast 2))) + (let ((mod-val (ocaml-resolve-module-path path-expr env))) + (cond + ((dict? mod-val) + (ocaml-eval body (ocaml-env-merge-dict env mod-val))) + (else (error (str "ocaml-eval: let open on non-module: " mod-val))))))) ((= tag "let-rec") ;; Tie the knot via a mutable cell when rhs is function-typed. ;; The placeholder closure dereferences the cell on each call. diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index c5aabfa8..42ab481a 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -682,6 +682,37 @@ (define parse-let (fn () + ;; `let open M in body` — local open. Detect early so the + ;; rest of the let-handler doesn't try to parse `open` as + ;; an ident name. + (cond + ((at-kw? "open") + (begin + (advance-tok!) + ;; Read path as Ctor(.Ctor)* and build :field-chain AST. + (let ((path nil)) + (begin + (when (= (ocaml-tok-type (peek-tok)) "ctor") + (begin + (set! path (list :con (ocaml-tok-value (peek-tok)))) + (advance-tok!))) + (define more + (fn () + (when (and (at-op? ".") + (= (ocaml-tok-type + (nth tokens (+ idx 1))) "ctor")) + (begin + (advance-tok!) ;; . + (let ((nm (ocaml-tok-value (peek-tok)))) + (begin + (advance-tok!) + (set! path (list :field path nm)))) + (more))))) + (more) + (consume! "keyword" "in") + (let ((body (parse-expr))) + (list :let-open path body)))))) + (else (let ((reccy false) (bindings (list))) (begin (when (at-kw? "rec") @@ -729,7 +760,7 @@ (else (if reccy (list :let-rec-mut bindings body) - (list :let-mut bindings body))))))))) + (list :let-mut bindings body))))))))))) (define parse-if (fn diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 8499f101..0f515f1c 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1178,6 +1178,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4801) (eval "(ocaml-run-program \"module M = struct let x = 1 and y = 2 end ;; M.x + M.y\")") +;; ── let open M in body ──────────────────────────────────────── +(epoch 4900) +(eval "(ocaml-run \"let open List in length [1;2;3]\")") +(epoch 4901) +(eval "(ocaml-run \"let open List in map (fun x -> x * 2) [1;2;3]\")") +(epoch 4902) +(eval "(ocaml-run \"let open Option in map (fun x -> x + 1) (Some 5)\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1867,6 +1875,11 @@ check 4703 "((1+2) : int) * 3" '9' check 4800 "module rec a/b mutual" '1' check 4801 "module x and y" '3' +# ── let open M in body ───────────────────────────────────────── +check 4900 "let open List; length" '3' +check 4901 "let open List; map" '(2 4 6)' +check 4902 "let open Option; map" '("Some" 6)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0ebacfc1..ba21a0e8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 4 — `let open M in body` local opens (+3 tests, 478 + total). Parser detects `let open` as a separate let-form, parses M + as a path (Ctor(.Ctor)*), and emits `(:let-open PATH BODY)`. Eval + resolves the path to a module dict and merges its bindings into the + env for body evaluation. `let open List in map (fun x -> x * 2) + [1;2;3]` → `[2;4;6]`. - 2026-05-08 Phase 4 — `:def-mut` / `:def-rec-mut` inside module bodies (+2 tests, 475 total). `ocaml-eval-module` now handles multi-binding `let .. and ..` decls. `module M = struct let rec a n = From 982e9680fe02d484797899fb338d60ac9a67f31c Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 21:43:38 +0000 Subject: [PATCH 082/298] ocaml: phase 4 'M.(expr)' local-open expression form (+3 tests, 481 total) In parse-atom-postfix, after consuming '.', if the next token is '(', parse the inner expression and emit (:let-open M EXPR) instead of :field. Cleanly composes with the existing :let-open evaluator and loops to allow chained dot postfixes. List.(length [1;2;3]) = 3 List.(map (fun x -> x + 1) [1;2;3]) = [2;3;4] Option.(map (fun x -> x * 10) (Some 4)) = Some 40 --- lib/ocaml/parser.sx | 21 ++++++++++++++++----- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 42ab481a..642bc607 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -562,11 +562,22 @@ (when (at-op? ".") (begin (advance-tok!) - (let ((tok (peek-tok))) - (begin - (advance-tok!) - (set! head (list :field head (ocaml-tok-value tok))) - (loop))))))) + (cond + ((at-op? "(") + (begin + (advance-tok!) + (let ((inner (parse-expr))) + (begin + (consume! "op" ")") + (set! head (list :let-open head inner)) + (loop))))) + (else + (let ((tok (peek-tok))) + (begin + (advance-tok!) + (set! head (list :field head + (ocaml-tok-value tok))) + (loop))))))))) (loop) head)))) (set! diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0f515f1c..59e816f5 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1186,6 +1186,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4902) (eval "(ocaml-run \"let open Option in map (fun x -> x + 1) (Some 5)\")") +;; ── M.(expr) local-open expression form ─────────────────────── +(epoch 4910) +(eval "(ocaml-run \"List.(length [1;2;3])\")") +(epoch 4911) +(eval "(ocaml-run \"List.(map (fun x -> x + 1) [1;2;3])\")") +(epoch 4912) +(eval "(ocaml-run \"Option.(map (fun x -> x * 10) (Some 4))\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1880,6 +1888,11 @@ check 4900 "let open List; length" '3' check 4901 "let open List; map" '(2 4 6)' check 4902 "let open Option; map" '("Some" 6)' +# ── M.(expr) local-open expression form ────────────────────────── +check 4910 "M.(expr) length" '3' +check 4911 "M.(expr) map" '(2 3 4)' +check 4912 "M.(expr) Option map" '("Some" 40)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ba21a0e8..1b8ec2b9 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 4 — `M.(expr)` local-open expression form (+3 + tests, 481 total). Implemented in parse-atom-postfix: after + consuming `.`, if next token is `(`, parse the inner expression and + emit `(:let-open M EXPR)` instead of `:field`. Cleanly composes with + existing `:let-open` evaluator. `List.(length [1;2;3])` → 3, + `Option.(map (fun x -> x * 10) (Some 4))` → Some 40. - 2026-05-08 Phase 4 — `let open M in body` local opens (+3 tests, 478 total). Parser detects `let open` as a separate let-form, parses M as a path (Ctor(.Ctor)*), and emits `(:let-open PATH BODY)`. Eval From bc4f4a5477544dea330ac7e0a401af5c36766876 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 23:40:36 +0000 Subject: [PATCH 083/298] ocaml: phase 5.1 roman.ml baseline + top-level 'let () = expr' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Side-quest emerged from adding roman.ml baseline (Roman numeral greedy encoding): top-level 'let () = expr' was unsupported because ocaml-parse-program's parse-decl-let consumed an ident strictly. Now parse-decl-let recognises a leading '()' as a unit binding and synthesises a __unit_NN name (matching how parse-let already handles inner-let unit patterns). roman.ml exercises: * tuple list literal [(int * string); ...] * recursive pattern match on tuple-cons * String.length + List.fold_left * the new top-level let () support (sanity in a comment, even though the program ends with a bare expression for the test harness) Bumped lib/ocaml/test.sh server timeout 180->360s — the recent surge in test count plus a CPU-contended host was crowding out the sole epoch reaching the deeper smarts. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/roman.ml | 20 ++++++++++++++++++++ lib/ocaml/parser.sx | 11 ++++++++++- lib/ocaml/test.sh | 2 +- plans/ocaml-on-sx.md | 9 +++++++++ 5 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 lib/ocaml/baseline/roman.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7ddf40cd..86ceac8d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -17,6 +17,7 @@ "poly_stack.ml": 5, "queens.ml": 2, "quicksort.ml": 44, + "roman.ml": 44, "sum_squares.ml": 385, "word_count.ml": 3 } diff --git a/lib/ocaml/baseline/roman.ml b/lib/ocaml/baseline/roman.ml new file mode 100644 index 00000000..5dec490d --- /dev/null +++ b/lib/ocaml/baseline/roman.ml @@ -0,0 +1,20 @@ +let to_roman n = + let pairs = [ + (1000, "M"); (900, "CM"); (500, "D"); (400, "CD"); + (100, "C"); (90, "XC"); (50, "L"); (40, "XL"); + (10, "X"); (9, "IX"); (5, "V"); (4, "IV"); (1, "I") + ] in + let rec aux n pairs acc = + match pairs with + | [] -> acc + | (v, s) :: rest -> + if n >= v then aux (n - v) pairs (acc ^ s) + else aux n rest acc + in + aux n pairs "" +;; + +List.fold_left + (fun acc n -> acc + String.length (to_roman n)) + 0 + [1; 4; 9; 14; 49; 99; 444; 1994; 3888] diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 642bc607..c686a257 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -1147,7 +1147,16 @@ (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) (define parse-one! (fn () - (let ((nm (ocaml-tok-value (consume! "ident" nil))) + (let ((nm (cond + ((and (at-op? "(") + (< (+ idx 1) tok-len) + (let ((t1 (nth tokens (+ idx 1)))) + (and (= (ocaml-tok-type t1) "op") + (= (ocaml-tok-value t1) ")")))) + (begin (advance-tok!) (advance-tok!) + (str "__unit_" idx))) + (else + (ocaml-tok-value (consume! "ident" nil))))) (ps (list))) (begin (define collect-params diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 59e816f5..cdaf806b 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1196,7 +1196,7 @@ cat > "$TMPFILE" << 'EPOCHS' EPOCHS -OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) +OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) check() { local epoch="$1" desc="$2" expected="$3" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1b8ec2b9..1dec9472 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 5.1 — roman.ml baseline (Roman numeral greedy + encoding). Side-quest: top-level `let () = expr` was unsupported by + ocaml-parse-program — now parse-decl-let recognises `()` as a unit + binding (`__unit_NN` synthetic name), matching the inner-let handling + in parse-let. roman.ml uses recursive pattern match on + `(int * string) list` greedy table + `List.fold_left + String.length` + to compute the cumulative length of 9 encoded numbers (44). + Bumped test.sh server timeout 180→360s for headroom on contended + systems. - 2026-05-08 Phase 4 — `M.(expr)` local-open expression form (+3 tests, 481 total). Implemented in parse-atom-postfix: after consuming `.`, if next token is `(`, parse the inner expression and From f895a118fbb4229f3a9ed77ad028a2f32d885758 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 23:58:37 +0000 Subject: [PATCH 084/298] ocaml: phase 4 's.[i]' string indexing syntax (+3 tests, 484 total) parse-atom-postfix now dispatches three cases after consuming '.': .field -> existing field/module access .(EXPR) -> existing local-open .[EXPR] -> new string-get syntax (this commit) Eval reduces (:string-get S I) to host (nth S I), which already returns a one-character string for OCaml's char model. Lets us write idiomatic OCaml string traversal: let s = "hi" in let n = ref 0 in for i = 0 to String.length s - 1 do n := !n + Char.code s.[i] done; !n (* = 209 *) --- lib/ocaml/eval.sx | 5 +++++ lib/ocaml/parser.sx | 8 ++++++++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 4 files changed, 32 insertions(+) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 6a22b22f..83111b9a 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -557,6 +557,11 @@ ((dict? target) (get target fname)) (else (error (str "ocaml-eval: not a record/module on .field: " target))))))) + ((= tag "string-get") + ;; (:string-get S I) — evaluate s.[i] as a char access. + (let ((s (ocaml-eval (nth ast 1) env)) + (i (ocaml-eval (nth ast 2) env))) + (nth s i))) ((= tag "for") ;; (:for NAME LO HI DIR BODY) — DIR is "ascend" or "descend". (let ((name (nth ast 1)) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index c686a257..6b0f4abd 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -571,6 +571,14 @@ (consume! "op" ")") (set! head (list :let-open head inner)) (loop))))) + ((at-op? "[") + (begin + (advance-tok!) + (let ((idx-expr (parse-expr))) + (begin + (consume! "op" "]") + (set! head (list :string-get head idx-expr)) + (loop))))) (else (let ((tok (peek-tok))) (begin diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index cdaf806b..47fac6a2 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1194,6 +1194,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4912) (eval "(ocaml-run \"Option.(map (fun x -> x * 10) (Some 4))\")") +;; ── s.[i] string indexing ───────────────────────────────────── +(epoch 4920) +(eval "(ocaml-run \"let s = \\\"hello\\\" in s.[0]\")") +(epoch 4921) +(eval "(ocaml-run \"let s = \\\"abc\\\" in Char.code s.[2]\")") +(epoch 4922) +(eval "(ocaml-run \"let s = \\\"hi\\\" in let n = ref 0 in for i = 0 to String.length s - 1 do n := !n + Char.code s.[i] done; !n\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1893,6 +1901,11 @@ check 4910 "M.(expr) length" '3' check 4911 "M.(expr) map" '(2 3 4)' check 4912 "M.(expr) Option map" '("Some" 40)' +# ── s.[i] string indexing ──────────────────────────────────────── +check 4920 "s.[0] hello" '"h"' +check 4921 "Char.code s.[2] abc" '99' +check 4922 "for i s.[i] sum hi" '209' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1dec9472..ab531d58 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-08 Phase 4 — `s.[i]` string indexing syntax (+3 tests, 484 + total). parse-atom-postfix now handles `.[expr]` after `.`, + emitting `(:string-get S I)`; eval reduces to host `(nth s i)`. + Pairs with the existing `M.(expr)` and `.field` postfixes — all three + share one dot loop. `let s = "hi" in for i = 0 to String.length s - + 1 do n := !n + Char.code s.[i] done; !n` returns 209 (h+i). - 2026-05-08 Phase 5.1 — roman.ml baseline (Roman numeral greedy encoding). Side-quest: top-level `let () = expr` was unsupported by ocaml-parse-program — now parse-decl-let recognises `()` as a unit From 0234ae329ede164c89d1756f0a734eb444ebdb32 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 00:13:11 +0000 Subject: [PATCH 085/298] ocaml: phase 5.1 caesar.ml baseline (ROT13 + s.[i] + Char ops) Side-quests required to land caesar.ml: 1. Top-level 'let r = expr in body' is now an expression decl, not a broken decl-let. ocaml-parse-program's dispatch now checks has-matching-in? at every top-level let; if matched, slices via skip-let-rhs-boundary (which already opens depth on a leading let with matching in) and ocaml-parse on the slice, wrapping as :expr. 2. runtime.sx: added String.make / String.init / String.map. Used by caesar.ml's encode = String.init n (fun i -> shift_char s.[i] k). 3. baseline run.sh per-program timeout 240->480s (system load on the shared host frequently exceeds 240s for large baselines). caesar.ml exercises: * the new top-level let-in expression dispatch * s.[i] string indexing * Char.code / Char.chr round-trip math * String.init with a closure that captures k Test value: Char.code r.[0] + Char.code r.[4] after ROT13(ROT13('hello')) = 104 + 111 = 215. --- lib/ocaml/baseline/caesar.ml | 14 ++++++++++++++ lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/run.sh | 2 +- lib/ocaml/parser.sx | 17 ++++++++++++++++- lib/ocaml/runtime.sx | 14 ++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 6 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 lib/ocaml/baseline/caesar.ml diff --git a/lib/ocaml/baseline/caesar.ml b/lib/ocaml/baseline/caesar.ml new file mode 100644 index 00000000..356014d2 --- /dev/null +++ b/lib/ocaml/baseline/caesar.ml @@ -0,0 +1,14 @@ +let shift_char c k = + let n = Char.code c in + if n >= 97 && n <= 122 then + Char.chr (((n - 97 + k) mod 26 + 26) mod 26 + 97) + else c + +let encode s k = + String.init (String.length s) (fun i -> shift_char s.[i] k) +;; + +(* ROT13 round-trip: encode (encode "hello" 13) 13 = "hello". + Sum the codes of two chars to give a deterministic integer check. *) +let r = encode (encode "hello" 13) 13 in +Char.code r.[0] + Char.code r.[4] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 86ceac8d..63f273b1 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -2,6 +2,7 @@ "anagrams.ml": 3, "bfs.ml": 6, "btree.ml": 39, + "caesar.ml": 215, "calc.ml": 13, "closures.ml": 315, "exception_handle.ml": 4, diff --git a/lib/ocaml/baseline/run.sh b/lib/ocaml/baseline/run.sh index c26fbdb0..4d1143d4 100755 --- a/lib/ocaml/baseline/run.sh +++ b/lib/ocaml/baseline/run.sh @@ -36,7 +36,7 @@ for f in lib/ocaml/baseline/*.ml; do (eval "(ocaml-run-program (file-read \"$f\"))") EOF - output=$(timeout 240 "$SX_SERVER" < "$TMP" 2>/dev/null) + output=$(timeout 480 "$SX_SERVER" < "$TMP" 2>/dev/null) rm -f "$TMP" result=$(echo "$output" | awk ' diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 6b0f4abd..23f5f13e 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -1603,7 +1603,22 @@ (cond ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-kw? "let") - (begin (append! decls (parse-decl-let)) (loop))) + (cond + ;; `let r = expr in body` at the top level is an + ;; expression-let, not a decl. Detect by scanning + ;; for a matching `in` at this depth — has-matching-in? + ;; walks the same boundaries as the decl scanner. + ((has-matching-in?) + (let ((expr-start (cur-pos))) + (begin + (skip-let-rhs-boundary!) + (let ((expr-src (slice src expr-start (cur-pos)))) + (let ((expr (ocaml-parse expr-src))) + (begin + (append! decls (list :expr expr)) + (loop))))))) + (else + (begin (append! decls (parse-decl-let)) (loop))))) ((at-kw? "module") (begin (append! decls (parse-decl-module)) (loop))) ((at-kw? "open") diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index d1ceec8f..9958cf38 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -360,6 +360,20 @@ let split_on_char c s = _string_split_on_char c s let replace_all s a b = _string_replace s a b let index_of s sub = _string_index_of s sub + let make n c = + let rec aux i acc = if i = 0 then acc else aux (i - 1) (acc ^ c) in + aux n \"\" + let init n f = + let rec aux i acc = + if i >= n then acc else aux (i + 1) (acc ^ f i) + in + aux 0 \"\" + let map f s = + let rec aux i acc = + if i >= _string_length s then acc + else aux (i + 1) (acc ^ f (_string_get s i)) + in + aux 0 \"\" end ;; module Bytes = struct diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ab531d58..a0e456a9 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — caesar.ml baseline (ROT13 with String.init + + s.[i] + Char.code/chr). Side-quests: + (1) top-level `let r = expr in body` is now treated as an expression + decl when has-matching-in? returns true at the dispatcher. Slices via + skip-let-rhs-boundary which already opens depth on a leading let + with matching in; + (2) added String.make / String.init / String.map to runtime; + (3) bumped lib/ocaml/baseline/run.sh per-program timeout 240→480s + for headroom on contended hosts. + Test = `Char.code r.[0] + Char.code r.[4]` after ROT13 round-trip on + "hello" → 215 (h+o). - 2026-05-08 Phase 4 — `s.[i]` string indexing syntax (+3 tests, 484 total). parse-atom-postfix now handles `.[expr]` after `.`, emitting `(:string-get S I)`; eval reduces to host `(nth s i)`. From bd2cd8aad18a907497e64f2c38e7b2b7cb7fe637 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 00:23:58 +0000 Subject: [PATCH 086/298] ocaml: phase 5.1 levenshtein.ml baseline (no-memo edit distance, sum=11) Recursive Levenshtein edit distance with no memoization (the test strings are short enough for the exponential-without-memo version to fit in <2 minutes on contended hosts). Sums distances for five short pairs: ('abc','abx') + ('ab','ba') + ('abc','axyc') + ('','abcd') + ('ab','') = 1 + 2 + 2 + 4 + 2 = 11 Exercises: * curried four-arg recursion * s.[i] equality test (char comparison) * min nested twice for the three-way recurrence * mixed empty-string base cases --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/levenshtein.ml | 18 ++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 25 insertions(+) create mode 100644 lib/ocaml/baseline/levenshtein.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 63f273b1..37aab475 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -11,6 +11,7 @@ "fizzbuzz.ml": 57, "list_ops.ml": 30, "lambda_calc.ml": 7, + "levenshtein.ml": 11, "memo_fib.ml": 75025, "module_use.ml": 3, "mutable_record.ml": 10, diff --git a/lib/ocaml/baseline/levenshtein.ml b/lib/ocaml/baseline/levenshtein.ml new file mode 100644 index 00000000..1ccf55a0 --- /dev/null +++ b/lib/ocaml/baseline/levenshtein.ml @@ -0,0 +1,18 @@ +let rec lev s1 s2 i j = + if i = 0 then j + else if j = 0 then i + else if s1.[i - 1] = s2.[j - 1] then + lev s1 s2 (i - 1) (j - 1) + else + 1 + min (lev s1 s2 (i - 1) j) + (min (lev s1 s2 i (j - 1)) (lev s1 s2 (i - 1) (j - 1))) + +let dist s1 s2 = lev s1 s2 (String.length s1) (String.length s2) + +;; + +dist "abc" "abx" ++ dist "ab" "ba" ++ dist "abc" "axyc" ++ dist "" "abcd" ++ dist "ab" "" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index a0e456a9..42479586 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — levenshtein.ml baseline (recursive edit + distance, no memo). Sums distances for five short pairs: + ("abc","abx")=1 + ("ab","ba")=2 + ("abc","axyc")=2 + + ("","abcd")=4 + ("ab","")=2 = 11. Exercises curried four-arg + recursion + s.[i] equality test + min nested twice + mixed empty + string base cases. - 2026-05-09 Phase 5.1 — caesar.ml baseline (ROT13 with String.init + s.[i] + Char.code/chr). Side-quests: (1) top-level `let r = expr in body` is now treated as an expression From 14b52cfaa7d8c6d1468fcf568809017b187b141c Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 00:32:35 +0000 Subject: [PATCH 087/298] ocaml: phase 4 'assert EXPR' (+3 tests, 487 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tokenizer already classified 'assert' as a keyword; this commit wires it through: parser : parse-prefix dispatches like 'not' — advance, recur, wrap as (:assert EXPR). eval : evaluate operand; nil on truthy, host-error 'Assert_failure' on false. Caught cleanly by existing try/with. assert true; 42 = 42 let x = 5 in assert (x = 5); x + 1 = 6 try (assert false; 0) with _ -> 99 = 99 --- lib/ocaml/eval.sx | 5 +++++ lib/ocaml/parser.sx | 2 ++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 4 files changed, 26 insertions(+) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 83111b9a..76ed38a7 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -430,6 +430,11 @@ (else (error (str "ocaml-eval: unbound variable " name)))))) ((= tag "neg") (- 0 (ocaml-eval (nth ast 1) env))) ((= tag "not") (not (ocaml-eval (nth ast 1) env))) + ((= tag "assert") + (let ((v (ocaml-eval (nth ast 1) env))) + (cond + ((= v false) (error "Assert_failure")) + (else nil)))) ((= tag "deref") (let ((cell (ocaml-eval (nth ast 1) env))) (nth cell 0))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 23f5f13e..806d1963 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -622,6 +622,8 @@ (begin (advance-tok!) (list :deref (parse-prefix)))) ((at-kw? "not") (begin (advance-tok!) (list :not (parse-prefix)))) + ((at-kw? "assert") + (begin (advance-tok!) (list :assert (parse-prefix)))) (else (parse-app))))) (set! parse-binop-rhs diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 47fac6a2..8382b86d 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1202,6 +1202,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4922) (eval "(ocaml-run \"let s = \\\"hi\\\" in let n = ref 0 in for i = 0 to String.length s - 1 do n := !n + Char.code s.[i] done; !n\")") +;; ── assert ──────────────────────────────────────────────────── +(epoch 4930) +(eval "(ocaml-run \"assert true; 42\")") +(epoch 4931) +(eval "(ocaml-run \"let x = 5 in assert (x = 5); x + 1\")") +(epoch 4932) +(eval "(ocaml-run \"try (assert false; 0) with _ -> 99\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1906,6 +1914,11 @@ check 4920 "s.[0] hello" '"h"' check 4921 "Char.code s.[2] abc" '99' check 4922 "for i s.[i] sum hi" '209' +# ── assert ─────────────────────────────────────────────────────── +check 4930 "assert true; 42" '42' +check 4931 "assert (x = 5); x + 1" '6' +check 4932 "try (assert false; ...) with" '99' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 42479586..5d124532 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — `assert EXPR` (+3 tests, 487 total). Tokenizer + already classified `assert` as a keyword; parse-prefix now handles + it like `not` (advance, recur, wrap). Eval evaluates the operand and + returns nil on truthy, raises `Assert_failure` on false (host-side + error so existing try/with handles it). `try (assert false; 0) with + _ -> 99` → 99. - 2026-05-09 Phase 5.1 — levenshtein.ml baseline (recursive edit distance, no memo). Sums distances for five short pairs: ("abc","abx")=1 + ("ab","ba")=2 + ("abc","axyc")=2 + From 1b38f89055a3382c61b2706eee45d1fb8e063c79 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 00:42:35 +0000 Subject: [PATCH 088/298] ocaml: phase 6 Printf.sprintf %d/%s/%f/%c/%b/%% + global string_of_* (+5 tests, 492 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the stub sprintf in runtime.sx with a real implementation: walk fmt char-by-char accumulating a prefix; on recognised %X return a one-arg fn that formats the arg and recurses on the rest of fmt. The function self-curries to the spec count — there's no separate arity machinery, just a closure chain. Specs: %d (int), %s (string), %f (float), %c (char/string in our model), %b (bool), %% (literal). Unknown specs pass through. Same expression returns a string (no specs) or a function (>=1 spec) — OCaml proper would reject this; works fine in OCaml-on-SX's dynamic runtime. Also adds top-level aliases: string_of_int = _string_of_int string_of_float = _string_of_float string_of_bool = if b then "true" else "false" int_of_string = _int_of_string Printf.sprintf "x=%d" 42 = "x=42" Printf.sprintf "%s = %d" "answer" 42 = "answer = 42" Printf.sprintf "%d%%" 50 = "50%" --- lib/ocaml/runtime.sx | 39 ++++++++++++++++++++++++++++++++++++--- lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 9958cf38..b9fa6373 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -424,8 +424,35 @@ end ;; module Printf = struct - let sprintf fmt = fmt - let printf fmt = print_string fmt + (* sprintf walks fmt, accumulating prefix. When it sees a %X + spec, it returns a function of one arg that substitutes the + arg and recurses on the rest of fmt. With no specs, returns + the bare format string. Specs supported: %d %s %f %c %b + (and %% as a literal). Unknown specs are passed through. *) + let sprintf fmt = + let n = _string_length fmt in + let rec walk pos prefix = + if pos >= n then prefix + else if pos + 1 < n && _string_get fmt pos = \"%\" then + let spec = _string_get fmt (pos + 1) in + if spec = \"%\" then walk (pos + 2) (prefix ^ \"%\") + else if spec = \"d\" || spec = \"s\" || spec = \"f\" + || spec = \"c\" || spec = \"b\" then + (fun arg -> + let s = + if spec = \"d\" then _string_of_int arg + else if spec = \"f\" then _string_of_float arg + else if spec = \"b\" then + (if arg then \"true\" else \"false\") + else arg + in + walk (pos + 2) (prefix ^ s)) + else walk (pos + 1) (prefix ^ _string_get fmt pos) + else walk (pos + 1) (prefix ^ _string_get fmt pos) + in + walk 0 \"\" + + let printf fmt = sprintf fmt end ;; module Stack = struct @@ -643,7 +670,13 @@ | [] -> [] | h :: t -> if mem h b then h :: inter t b else inter t b end - end") + end ;; + + let string_of_int n = _string_of_int n + let string_of_float f = _string_of_float f + let string_of_bool b = if b then \"true\" else \"false\" + let int_of_string s = _int_of_string s + ") (define ocaml-stdlib-loaded false) (define ocaml-stdlib-env nil) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 8382b86d..68a5fd2c 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1210,6 +1210,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4932) (eval "(ocaml-run \"try (assert false; 0) with _ -> 99\")") +;; ── Printf.sprintf + global string_of_* ────────────────────── +(epoch 4940) +(eval "(ocaml-run \"Printf.sprintf \\\"hello\\\"\")") +(epoch 4941) +(eval "(ocaml-run \"Printf.sprintf \\\"x=%d\\\" 42\")") +(epoch 4942) +(eval "(ocaml-run \"Printf.sprintf \\\"%s = %d\\\" \\\"answer\\\" 42\")") +(epoch 4943) +(eval "(ocaml-run \"Printf.sprintf \\\"%d%%\\\" 50\")") +(epoch 4944) +(eval "(ocaml-run \"string_of_int 7 ^ \\\"-\\\" ^ string_of_bool true\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1919,6 +1931,13 @@ check 4930 "assert true; 42" '42' check 4931 "assert (x = 5); x + 1" '6' check 4932 "try (assert false; ...) with" '99' +# ── Printf.sprintf ─────────────────────────────────────────────── +check 4940 "sprintf no args" '"hello"' +check 4941 "sprintf one %d" '"x=42"' +check 4942 "sprintf %s = %d" '"answer = 42"' +check 4943 "sprintf %d%% literal percent" '"50%"' +check 4944 "string_of_int + string_of_b" '"7-true"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5d124532..bb65f08a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,18 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Printf.sprintf with %d/%s/%f/%c/%b/%% (+4 + tests) and global `string_of_int`/`string_of_float`/`string_of_bool` + (+1 test). 492 total. sprintf walks fmt char-by-char accumulating + a prefix; on a recognised spec it returns a one-arg fn that formats + the arg and recurses on the rest of fmt — naturally curries to the + right arity since the spec count drives the chain. Dynamic typing + lets us return either a string (no specs) or a function (≥1 spec) + from the same expression, which OCaml proper would reject. + Examples: + Printf.sprintf "x=%d" 42 = "x=42" + Printf.sprintf "%s = %d" "answer" 42 = "answer = 42" + Printf.sprintf "%d%%" 50 = "50%" - 2026-05-09 Phase 4 — `assert EXPR` (+3 tests, 487 total). Tokenizer already classified `assert` as a keyword; parse-prefix now handles it like `not` (advance, recur, wrap). Eval evaluates the operand and From 207dfc60ad3b1c2346b7172edf8df678747ccaee Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 00:53:32 +0000 Subject: [PATCH 089/298] ocaml: phase 6 Hashtbl.iter / Hashtbl.fold (+2 tests, 494 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New host primitive _hashtbl_to_list returns the entries as a list of OCaml tuples — ('tuple' k v) form, matching the AST representation that the pattern-match VM (:ptuple) expects. Without that exact shape, '(k, v) :: rest' patterns fail to match. Hashtbl.iter / Hashtbl.fold in runtime walk that list with the user fn. This closes a long-standing gap: previously Hashtbl was opaque once values were written (we could only find_opt one key at a time). let t = Hashtbl.create 4 in Hashtbl.add t "a" 1; Hashtbl.add t "b" 2; Hashtbl.add t "c" 3; Hashtbl.fold (fun _ v acc -> acc + v) t 0 = 6 --- lib/ocaml/eval.sx | 18 +++++++++++++++++- lib/ocaml/runtime.sx | 18 ++++++++++++++++++ lib/ocaml/test.sh | 10 ++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 76ed38a7..c88b73ff 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -115,7 +115,23 @@ (list "_hashtbl_mem" (fn (t) (fn (k) (has-key? (nth t 0) (str k))))) (list "_hashtbl_length" - (fn (t) (len (keys (nth t 0)))))))) + (fn (t) (len (keys (nth t 0))))) + ;; _hashtbl_to_list: returns [(k, v); ...] as a list of pairs. + ;; Keys are returned as the stringified form used internally. + (list "_hashtbl_to_list" + (fn (t) + (let ((d (nth t 0)) (out (list))) + (begin + (define ks (keys d)) + (define loop + (fn (xs) + (when (not (= xs (list))) + (begin + (append! out + (list "tuple" (first xs) (get d (first xs)))) + (loop (rest xs)))))) + (loop ks) + out))))))) (define ocaml-env-lookup (fn (env name) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index b9fa6373..3b269af4 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -530,6 +530,24 @@ | Some v -> v let mem t k = _hashtbl_mem t k let length t = _hashtbl_length t + + (* iter / fold over (k, v) pairs. Keys come back as their string + representation since the host coerces all keys via `str`. *) + let iter f t = + let rec go xs = + match xs with + | [] -> () + | (k, v) :: rest -> f k v; go rest + in + go (_hashtbl_to_list t) + + let fold f t acc = + let rec go xs a = + match xs with + | [] -> a + | (k, v) :: rest -> go rest (f k v a) + in + go (_hashtbl_to_list t) acc end ;; module Map = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 68a5fd2c..cea5865b 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1222,6 +1222,12 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4944) (eval "(ocaml-run \"string_of_int 7 ^ \\\"-\\\" ^ string_of_bool true\")") +;; ── Hashtbl.iter / Hashtbl.fold ───────────────────────────── +(epoch 4950) +(eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"a\\\" 1; Hashtbl.add t \\\"b\\\" 2; Hashtbl.add t \\\"c\\\" 3; Hashtbl.fold (fun _ v acc -> acc + v) t 0\")") +(epoch 4951) +(eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"x\\\" 10; Hashtbl.add t \\\"y\\\" 20; let total = ref 0 in Hashtbl.iter (fun _ v -> total := !total + v) t; !total\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1938,6 +1944,10 @@ check 4942 "sprintf %s = %d" '"answer = 42"' check 4943 "sprintf %d%% literal percent" '"50%"' check 4944 "string_of_int + string_of_b" '"7-true"' +# ── Hashtbl.iter / Hashtbl.fold ───────────────────────────────── +check 4950 "Hashtbl.fold sum 1+2+3" '6' +check 4951 "Hashtbl.iter ref accum 10+20" '30' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bb65f08a..ab95d4e4 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Hashtbl.iter / Hashtbl.fold (+2 tests, 494 + total). New host primitive `_hashtbl_to_list` returns the entries + as a list of OCaml tuples (`("tuple" k v)` form, matching the AST + representation that pattern matching expects). Hashtbl.iter / fold + in runtime walk that list with the user fn. Closes a long-standing + gap: previously Hashtbl was opaque after writing to it. - 2026-05-09 Phase 6 — Printf.sprintf with %d/%s/%f/%c/%b/%% (+4 tests) and global `string_of_int`/`string_of_float`/`string_of_bool` (+1 test). 492 total. sprintf walks fmt char-by-char accumulating From 9907c1c58ca10a9e95f424488451b494b5c12009 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 01:03:40 +0000 Subject: [PATCH 090/298] ocaml: phase 4 'lazy EXPR' + Lazy.force (+2 tests, 496 total) Tokenizer already had 'lazy' as a keyword. This commit wires it through: parser : parse-prefix emits (:lazy EXPR), like the existing 'assert' handler. eval : creates a one-element cell with state ('Thunk' expr env). host : _lazy_force flips the cell to ('Forced' v) on first call and returns the cached value thereafter. runtime : module Lazy = struct let force lz = _lazy_force lz end. Memoisation confirmed by tracking a side-effect counter through two forces of the same lazy: let counter = ref 0 in let lz = lazy (counter := !counter + 1; 42) in let a = Lazy.force lz in let b = Lazy.force lz in (a + b) * 100 + !counter = 8401 (= 84*100 + 1) --- lib/ocaml/eval.sx | 19 +++++++++++++++++++ lib/ocaml/parser.sx | 2 ++ lib/ocaml/runtime.sx | 4 ++++ lib/ocaml/test.sh | 10 ++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 5 files changed, 42 insertions(+) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index c88b73ff..2171a545 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -116,6 +116,20 @@ (fn (t) (fn (k) (has-key? (nth t 0) (str k))))) (list "_hashtbl_length" (fn (t) (len (keys (nth t 0))))) + ;; _lazy_force: evaluate the thunk on first force, cache result. + ;; cell: one-elt list whose value is ("Thunk" expr env) or + ;; ("Forced" v). + (list "_lazy_force" + (fn (cell) + (let ((state (nth cell 0))) + (cond + ((= (first state) "Forced") (nth state 1)) + (else + (let ((expr (nth state 1)) (env (nth state 2))) + (let ((v (ocaml-eval expr env))) + (begin + (set-nth! cell 0 (list "Forced" v)) + v)))))))) ;; _hashtbl_to_list: returns [(k, v); ...] as a list of pairs. ;; Keys are returned as the stringified form used internally. (list "_hashtbl_to_list" @@ -451,6 +465,11 @@ (cond ((= v false) (error "Assert_failure")) (else nil)))) + ((= tag "lazy") + ;; (:lazy EXPR) — create a one-element cell containing + ;; ("Thunk" EXPR env); _lazy_force evaluates and caches. + (let ((expr (nth ast 1))) + (list (list "Thunk" expr env)))) ((= tag "deref") (let ((cell (ocaml-eval (nth ast 1) env))) (nth cell 0))) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 806d1963..23007963 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -624,6 +624,8 @@ (begin (advance-tok!) (list :not (parse-prefix)))) ((at-kw? "assert") (begin (advance-tok!) (list :assert (parse-prefix)))) + ((at-kw? "lazy") + (begin (advance-tok!) (list :lazy (parse-prefix)))) (else (parse-app))))) (set! parse-binop-rhs diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 3b269af4..02bc9597 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -455,6 +455,10 @@ let printf fmt = sprintf fmt end ;; + module Lazy = struct + let force lz = _lazy_force lz + end ;; + module Stack = struct let create () = ref [] let push x s = s := x :: !s diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index cea5865b..4092275f 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1228,6 +1228,12 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4951) (eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"x\\\" 10; Hashtbl.add t \\\"y\\\" 20; let total = ref 0 in Hashtbl.iter (fun _ v -> total := !total + v) t; !total\")") +;; ── lazy / Lazy.force ───────────────────────────────────────── +(epoch 4960) +(eval "(ocaml-run \"let x = lazy (1 + 2) in Lazy.force x\")") +(epoch 4961) +(eval "(ocaml-run \"let counter = ref 0 in let lz = lazy (counter := !counter + 1; 42) in let a = Lazy.force lz in let b = Lazy.force lz in (a + b) * 100 + !counter\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1948,6 +1954,10 @@ check 4944 "string_of_int + string_of_b" '"7-true"' check 4950 "Hashtbl.fold sum 1+2+3" '6' check 4951 "Hashtbl.iter ref accum 10+20" '30' +# ── lazy / Lazy.force ───────────────────────────────────────── +check 4960 "lazy 1+2 force" '3' +check 4961 "lazy memoization counter=1" '8401' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ab95d4e4..66de51c2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — `lazy EXPR` + `Lazy.force` (+2 tests, 496 + total). Tokenizer already had `lazy` as a keyword. parse-prefix now + emits `(:lazy EXPR)`; eval creates a one-element cell with state + `("Thunk" expr env)`. Host primitive `_lazy_force` flips the cell to + `("Forced" v)` on first call and returns the cached value on + subsequent calls. Memoization confirmed by tracking a side-effect + counter through two forces (counter increments only once). - 2026-05-09 Phase 6 — Hashtbl.iter / Hashtbl.fold (+2 tests, 494 total). New host primitive `_hashtbl_to_list` returns the entries as a list of OCaml tuples (`("tuple" k v)` form, matching the AST From 34d518d55583b3dc549deecc9ada6cf5951836c8 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 01:11:53 +0000 Subject: [PATCH 091/298] ocaml: phase 5.1 frequency.ml baseline + Format module alias (+2 tests, 498 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit frequency.ml exercises the recently-added Hashtbl.iter / fold + Hashtbl.find_opt + s.[i] indexing + for-loop together: build a char-count table for 'abracadabra' then take the max via Hashtbl.fold. Expected = 5 (a x 5). Total 25 baseline programs. Format module added as a thin alias of Printf — sprintf, printf, and asprintf all delegate to Printf.sprintf. The dynamic runtime doesn't distinguish boxes/breaks, so format strings work the same as in Printf and most Format-using OCaml programs now compile. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/frequency.ml | 18 ++++++++++++++++++ lib/ocaml/runtime.sx | 9 +++++++++ lib/ocaml/test.sh | 10 ++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 5 files changed, 44 insertions(+) create mode 100644 lib/ocaml/baseline/frequency.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 37aab475..fe38a370 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -8,6 +8,7 @@ "exception_handle.ml": 4, "expr_eval.ml": 16, "factorial.ml": 3628800, + "frequency.ml": 5, "fizzbuzz.ml": 57, "list_ops.ml": 30, "lambda_calc.ml": 7, diff --git a/lib/ocaml/baseline/frequency.ml b/lib/ocaml/baseline/frequency.ml new file mode 100644 index 00000000..3c6d22ae --- /dev/null +++ b/lib/ocaml/baseline/frequency.ml @@ -0,0 +1,18 @@ +let count_chars s = + let t = Hashtbl.create 8 in + for i = 0 to String.length s - 1 do + let c = s.[i] in + let n = match Hashtbl.find_opt t c with + | Some v -> v + 1 + | None -> 1 + in + Hashtbl.replace t c n + done; + t + +let max_count t = + Hashtbl.fold (fun _ v acc -> if v > acc then v else acc) t 0 + +;; + +max_count (count_chars "abracadabra") diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 02bc9597..cd5eea0f 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -455,6 +455,15 @@ let printf fmt = sprintf fmt end ;; + module Format = struct + (* Thin alias of Printf for pretty-printing parity. The dynamic + runtime doesn't distinguish boxes/breaks — fmt strings work + the same as in Printf. *) + let sprintf fmt = Printf.sprintf fmt + let printf fmt = Printf.sprintf fmt + let asprintf fmt = Printf.sprintf fmt + end ;; + module Lazy = struct let force lz = _lazy_force lz end ;; diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 4092275f..326fa9f1 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1234,6 +1234,12 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4961) (eval "(ocaml-run \"let counter = ref 0 in let lz = lazy (counter := !counter + 1; 42) in let a = Lazy.force lz in let b = Lazy.force lz in (a + b) * 100 + !counter\")") +;; ── Format alias of Printf ──────────────────────────────────── +(epoch 4970) +(eval "(ocaml-run \"Format.sprintf \\\"%d\\\" 99\")") +(epoch 4971) +(eval "(ocaml-run \"Format.asprintf \\\"%s=%d\\\" \\\"n\\\" 7\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1958,6 +1964,10 @@ check 4951 "Hashtbl.iter ref accum 10+20" '30' check 4960 "lazy 1+2 force" '3' check 4961 "lazy memoization counter=1" '8401' +# ── Format alias ───────────────────────────────────────────────── +check 4970 "Format.sprintf %d" '"99"' +check 4971 "Format.asprintf %s=%d" '"n=7"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 66de51c2..0e87915f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — frequency.ml baseline + Format module alias + (+2 tests, 498 total). frequency.ml builds a Hashtbl of char→count + via `Hashtbl.find_opt` + `Hashtbl.replace` inside a `for` loop, then + uses `Hashtbl.fold` to find the maximum count. `count_chars + "abracadabra"` → max is 5 (a×5). Format module added as a thin + alias of Printf — sprintf / printf / asprintf all delegate. - 2026-05-09 Phase 4 — `lazy EXPR` + `Lazy.force` (+2 tests, 496 total). Tokenizer already had `lazy` as a keyword. parse-prefix now emits `(:lazy EXPR)`; eval creates a one-element cell with state From 8af36306254ae9dd0c45fc08f7f78afc3bfd9794 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 01:19:28 +0000 Subject: [PATCH 092/298] ocaml: phase 6 String.iter/iteri/fold_left/fold_right/to_seq/of_seq (+3 tests, 501 total) Six new String functions, all in OCaml syntax inside runtime.sx: iter : index-walk with side-effecting f iteri : iter with index fold_left : thread accumulator left-to-right fold_right: thread accumulator right-to-left to_seq : return a char list (lazy in real OCaml; eager here) of_seq : concat a char list back to a string Round-trip: String.of_seq (List.rev (String.to_seq "hello")) = "olleh" Note: real OCaml's Seq is lazy. We return a plain list because the existing stdlib already provides exhaustive list operations and we don't yet have lazy sequences. If a baseline needs Seq.unfold or similar, we'll graduate to a proper Seq module then. --- lib/ocaml/runtime.sx | 37 +++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 57 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index cd5eea0f..52d9d2ec 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -374,6 +374,43 @@ else aux (i + 1) (acc ^ f (_string_get s i)) in aux 0 \"\" + let iter f s = + let rec aux i = + if i >= _string_length s then () + else (f (_string_get s i); aux (i + 1)) + in + aux 0 + let iteri f s = + let rec aux i = + if i >= _string_length s then () + else (f i (_string_get s i); aux (i + 1)) + in + aux 0 + let fold_left f init s = + let rec aux i acc = + if i >= _string_length s then acc + else aux (i + 1) (f acc (_string_get s i)) + in + aux 0 init + let fold_right f s init = + let rec aux i acc = + if i < 0 then acc + else aux (i - 1) (f (_string_get s i) acc) + in + aux (_string_length s - 1) init + let to_seq s = + let rec aux i = + if i >= _string_length s then [] + else _string_get s i :: aux (i + 1) + in + aux 0 + let of_seq xs = + let rec aux ys acc = + match ys with + | [] -> acc + | h :: t -> aux t (acc ^ h) + in + aux xs \"\" end ;; module Bytes = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 326fa9f1..f8c1467a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1240,6 +1240,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4971) (eval "(ocaml-run \"Format.asprintf \\\"%s=%d\\\" \\\"n\\\" 7\")") +;; ── String.iter / fold / seq ───────────────────────────────── +(epoch 4980) +(eval "(ocaml-run \"let n = ref 0 in String.iter (fun c -> n := !n + Char.code c) \\\"abc\\\"; !n\")") +(epoch 4981) +(eval "(ocaml-run \"String.fold_left (fun acc c -> acc + Char.code c) 0 \\\"hi\\\"\")") +(epoch 4982) +(eval "(ocaml-run \"String.of_seq (List.rev (String.to_seq \\\"hello\\\"))\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1968,6 +1976,11 @@ check 4961 "lazy memoization counter=1" '8401' check 4970 "Format.sprintf %d" '"99"' check 4971 "Format.asprintf %s=%d" '"n=7"' +# ── String.iter / fold_left / seq ─────────────────────────────── +check 4980 "String.iter sum codes abc" '294' +check 4981 "String.fold_left sum hi" '209' +check 4982 "String.of_seq (rev to_seq)" '"olleh"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0e87915f..b19573db 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — String.iter / iteri / fold_left / fold_right / + to_seq / of_seq (+3 tests, 501 total). All implemented in OCaml + syntax inside the runtime stdlib; iter / iteri walk via index + + side-effecting `f`, fold_left / fold_right thread an accumulator, + to_seq returns a char list, of_seq concats a char list back to a + string. Round-trip: `String.of_seq (List.rev (String.to_seq + "hello"))` → "olleh". - 2026-05-09 Phase 5.1 — frequency.ml baseline + Format module alias (+2 tests, 498 total). frequency.ml builds a Hashtbl of char→count via `Hashtbl.find_opt` + `Hashtbl.replace` inside a `for` loop, then From a34cfe69dcec052b118a9cdb22dbbd05b0e35b19 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 01:29:02 +0000 Subject: [PATCH 093/298] ocaml: phase 6 List.sort_uniq + List.find_map (+2 tests, 503 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sort_uniq: Sort with the user comparator, then walk the sorted list dropping any element equal to its predecessor. Output is sorted and unique. List.sort_uniq compare [3;1;2;1;3;2;4] = [1;2;3;4] find_map: Walk until the user fn returns Some v; return that. If all None, return None. List.find_map (fun x -> if x > 5 then Some (x * 2) else None) [1;2;3;6;7] = Some 12 Both defined in OCaml syntax in runtime.sx — no host primitive needed since they're pure list traversals over existing operations. --- lib/ocaml/runtime.sx | 20 ++++++++++++++++++++ lib/ocaml/test.sh | 10 ++++++++++ plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 35 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 52d9d2ec..58fea697 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -158,6 +158,26 @@ let stable_sort = sort + let rec sort_uniq cmp xs = + let sorted = sort cmp xs in + let rec dedup ys = + match ys with + | [] -> [] + | [x] -> [x] + | a :: b :: rest -> + if cmp a b = 0 then dedup (b :: rest) + else a :: dedup (b :: rest) + in + dedup sorted + + let rec find_map f xs = + match xs with + | [] -> None + | h :: t -> + match f h with + | Some v -> Some v + | None -> find_map f t + let rec combine xs ys = match xs with | [] -> (match ys with diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index f8c1467a..39414caf 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1248,6 +1248,12 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4982) (eval "(ocaml-run \"String.of_seq (List.rev (String.to_seq \\\"hello\\\"))\")") +;; ── List.sort_uniq / List.find_map ─────────────────────────── +(epoch 4990) +(eval "(ocaml-run \"List.sort_uniq compare [3;1;2;1;3;2;4]\")") +(epoch 4991) +(eval "(ocaml-run \"List.find_map (fun x -> if x > 5 then Some (x * 2) else None) [1;2;3;6;7]\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1981,6 +1987,10 @@ check 4980 "String.iter sum codes abc" '294' check 4981 "String.fold_left sum hi" '209' check 4982 "String.of_seq (rev to_seq)" '"olleh"' +# ── List.sort_uniq / List.find_map ────────────────────────────── +check 4990 "sort_uniq dedupes & sorts" '(1 2 3 4)' +check 4991 "find_map first >5 doubled" '("Some" 12)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b19573db..51ed0288 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — List.sort_uniq / List.find_map (+2 tests, 503 + total). sort_uniq sorts then dedups consecutive equals. find_map + walks until the user fn returns `Some v` and returns it (or `None` + on empty/all-None). Closes two of the most-asked-for list ops; both + defined in OCaml syntax in runtime.sx. - 2026-05-09 Phase 6 — String.iter / iteri / fold_left / fold_right / to_seq / of_seq (+3 tests, 501 total). All implemented in OCaml syntax inside the runtime stdlib; iter / iteri walk via index + From 19497c9fba91be0d7c140152bd9b227bbe85bf0d Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 01:38:09 +0000 Subject: [PATCH 094/298] ocaml: phase 4 polymorphic variants confirmation (+3 tests, 506 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tokenizer already classified backtick-uppercase as a ctor identical to a nominal one, but it had never been exercised by the suite. This commit adds three smoke tests confirming that nullary, n-ary, and list-of-polyvariant patterns all match: let x = polyvar(Foo) in match x with polyvar(Foo) -> 1 | polyvar(Bar) -> 2 let x = polyvar(Pair) (5, 7) in match x with polyvar(Pair) (a, b) -> a + b | _ -> 0 List.map (fun x -> match x with polyvar(On) -> 1 | polyvar(Off) -> 0) [polyvar(On); polyvar(Off); polyvar(On)] (In the actual SX, polyvar(X) is the literal backtick-X — backticks in this commit message are escaped to avoid shell interpretation.) Since OCaml-on-SX is dynamic, there's no structural row inference, but matching by tag works. --- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 39414caf..d3a1fe03 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1254,6 +1254,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 4991) (eval "(ocaml-run \"List.find_map (fun x -> if x > 5 then Some (x * 2) else None) [1;2;3;6;7]\")") +;; ── Polymorphic variants ────────────────────────────────────── +(epoch 5000) +(eval "(ocaml-run \"let x = `Foo in match x with `Foo -> 1 | `Bar -> 2\")") +(epoch 5001) +(eval "(ocaml-run \"let x = `Pair (5, 7) in match x with `Pair (a, b) -> a + b | _ -> 0\")") +(epoch 5002) +(eval "(ocaml-run \"List.map (fun x -> match x with `On -> 1 | `Off -> 0) [`On; `Off; `On]\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1991,6 +1999,11 @@ check 4982 "String.of_seq (rev to_seq)" '"olleh"' check 4990 "sort_uniq dedupes & sorts" '(1 2 3 4)' check 4991 "find_map first >5 doubled" '("Some" 12)' +# ── Polymorphic variants ───────────────────────────────────────── +check 5000 'match polyvar Foo / Bar' '1' +check 5001 'match polyvar Pair (a, b)' '12' +check 5002 'List.map polyvar On / Off' '(1 0 1)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 51ed0288..d150efe9 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — polymorphic variants confirmation (+3 tests, + 506 total). The tokenizer was already classifying `` `Tag `` as a + ctor identical to a nominal one, but it had never been exercised by + tests. Now verified that nullary, n-ary, and list-of-polyvariants + patterns all match: `` `Foo``, `` `Pair (5, 7)``, `[`On; `Off]`. + Effectively free since OCaml-on-SX is dynamic — there's no + structural row inference, but matching by tag works. - 2026-05-09 Phase 6 — List.sort_uniq / List.find_map (+2 tests, 503 total). sort_uniq sorts then dedups consecutive equals. find_map walks until the user fn returns `Some v` and returns it (or `None` From 5618dd1ef5ea651a641e46180914f92ca339ad02 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 01:47:27 +0000 Subject: [PATCH 095/298] ocaml: phase 5.1 csv.ml baseline (split + int_of_string + fold_left) Inline CSV-like text: a,1,extra b,2,extra c,3,extra d,4,extra Two-stage String.split_on_char: first on '\n' for rows, then on ',' for fields per row. List.fold_left accumulates int_of_string of the second field across rows. Result = 1+2+3+4 = 10. Exercises char escapes inside string literals ('\n'), nested String.split_on_char, List.fold_left with a non-trivial closure body, and int_of_string. 23 baseline programs total. --- lib/ocaml/baseline/csv.ml | 12 ++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/csv.ml diff --git a/lib/ocaml/baseline/csv.ml b/lib/ocaml/baseline/csv.ml new file mode 100644 index 00000000..56a023f3 --- /dev/null +++ b/lib/ocaml/baseline/csv.ml @@ -0,0 +1,12 @@ +let sum_second_col text = + let lines = String.split_on_char '\n' text in + List.fold_left (fun acc line -> + let fields = String.split_on_char ',' line in + if List.length fields >= 2 then + acc + int_of_string (List.nth fields 1) + else acc + ) 0 lines + +;; + +sum_second_col "a,1,extra\nb,2,extra\nc,3,extra\nd,4,extra" diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index fe38a370..343c9c3b 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -5,6 +5,7 @@ "caesar.ml": 215, "calc.ml": 13, "closures.ml": 315, + "csv.ml": 10, "exception_handle.ml": 4, "expr_eval.ml": 16, "factorial.ml": 3628800, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d150efe9..29cfc0cf 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — csv.ml baseline (split on '\n' then ',', + parse-int the second field, fold-left). Exercises char escapes + inside string literals, two-stage String.split_on_char, mixed + List.fold_left + int_of_string + List.nth. Sums column 2 of a + 4-row inline CSV → 1+2+3+4 = 10. 23 baseline programs total. - 2026-05-09 Phase 4 — polymorphic variants confirmation (+3 tests, 506 total). The tokenizer was already classifying `` `Tag `` as a ctor identical to a nominal one, but it had never been exercised by From 1ed3216ba6731534ffe7bd2b51b5141b4455d851 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 01:59:13 +0000 Subject: [PATCH 096/298] ocaml: phase 6 Array module + (op) operator sections (+6 tests, 512 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Array module (runtime.sx, OCaml syntax): Backed by a 'ref of list'. make/length/get/init build the cell; set rewrites the underlying list with one cell changed (O(n) but works for short arrays in baseline programs). Includes iter/iteri/map/mapi/fold_left/to_list/of_list/copy/blit/fill. (op) operator sections (parser.sx, parse-atom): When the token after '(' is a binop (any op with non-zero precedence in the binop table) and the next token is ')', emit (:fun ('a' 'b') (:op OP a b)) — i.e. (+) becomes fun a b -> a + b. Recognises every binop including 'mod', 'land', '^', '@', '::', etc. Lets us write: List.fold_left (+) 0 [1;2;3;4;5] = 15 let f = ( * ) in f 6 7 = 42 List.map ((-) 10) [1;2;3] = [9;8;7] let a = Array.make 5 7 in Array.set a 2 99; Array.fold_left (+) 0 a = 127 --- lib/ocaml/parser.sx | 17 +++++++++++++++++ lib/ocaml/runtime.sx | 43 +++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 26 ++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 4 files changed, 96 insertions(+) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 23007963..2d1757f9 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -419,6 +419,23 @@ (advance-tok!) (cond ((at-op? ")") (begin (advance-tok!) (list :unit))) + ;; (op) — operator section: build (fun a b -> a op b). + ;; Recognises ops with precedence > 0 (i.e. binops), + ;; followed immediately by `)`. + ((and (or (= (ocaml-tok-type (peek-tok)) "op") + (= (ocaml-tok-type (peek-tok)) "keyword")) + (not (= (ocaml-binop-prec + (ocaml-tok-value (peek-tok))) 0)) + (let ((t1 (nth tokens (+ idx 1)))) + (and (= (ocaml-tok-type t1) "op") + (= (ocaml-tok-value t1) ")")))) + (let ((opv (ocaml-tok-value (peek-tok)))) + (begin + (advance-tok!) + (advance-tok!) + (list :fun (list "a" "b") + (list :op opv (list :var "a") + (list :var "b")))))) (else (let ((e (parse-expr))) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 58fea697..546afabb 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -525,6 +525,49 @@ let force lz = _lazy_force lz end ;; + module Array = struct + (* Backed by a ref-of-list. Mutating set! replaces the underlying + list with a new one in which one cell is changed. O(n) set, but + good enough for short arrays in baseline programs. *) + let make n v = + let rec build i acc = + if i = 0 then acc else build (i - 1) (v :: acc) + in + ref (build n []) + + let length a = List.length !a + let get a i = List.nth !a i + + let set a i v = + let rec replace lst k = + match lst with + | [] -> [] + | h :: t -> if k = 0 then v :: t else h :: replace t (k - 1) + in + a := replace !a i + + let init n f = + let rec build i acc = + if i = 0 then acc else build (i - 1) (f (i - 1) :: acc) + in + ref (build n []) + + let iter f a = List.iter f !a + let iteri f a = List.iteri f !a + let map f a = ref (List.map f !a) + let mapi f a = ref (List.mapi f !a) + let fold_left f init a = List.fold_left f init !a + let to_list a = !a + let of_list xs = ref xs + let copy a = ref !a + let blit src si dst di n = + for k = 0 to n - 1 do + set dst (di + k) (get src (si + k)) + done + let fill a pos n v = + for k = 0 to n - 1 do set a (pos + k) v done + end ;; + module Stack = struct let create () = ref [] let push x s = s := x :: !s diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index d3a1fe03..3cdd55a5 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1262,6 +1262,22 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5002) (eval "(ocaml-run \"List.map (fun x -> match x with `On -> 1 | `Off -> 0) [`On; `Off; `On]\")") +;; ── Array module ───────────────────────────────────────────── +(epoch 5010) +(eval "(ocaml-run \"let a = Array.make 5 7 in Array.set a 2 99; Array.fold_left (+) 0 a\")") +(epoch 5011) +(eval "(ocaml-run \"let a = Array.init 4 (fun i -> i * i) in Array.fold_left (+) 0 a\")") +(epoch 5012) +(eval "(ocaml-run \"let a = Array.make 3 0 in for i = 0 to 2 do Array.set a i (i + 1) done; Array.length a + Array.get a 0 + Array.get a 1 + Array.get a 2\")") + +;; ── (op) operator sections ─────────────────────────────────── +(epoch 5020) +(eval "(ocaml-run \"List.fold_left (+) 0 [1;2;3;4;5]\")") +(epoch 5021) +(eval "(ocaml-run \"let f = ( * ) in f 6 7\")") +(epoch 5022) +(eval "(ocaml-run \"List.map ((-) 10) [1;2;3]\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2004,6 +2020,16 @@ check 5000 'match polyvar Foo / Bar' '1' check 5001 'match polyvar Pair (a, b)' '12' check 5002 'List.map polyvar On / Off' '(1 0 1)' +# ── Array module ──────────────────────────────────────────────── +check 5010 "Array.make + set 2 99" '127' +check 5011 "Array.init 4 i*i" '14' +check 5012 "Array.make 3 + for + length" '9' + +# ── (op) operator sections ────────────────────────────────────── +check 5020 "fold_left (+) sum" '15' +check 5021 "let f = (*) in f 6 7" '42' +check 5022 "List.map ((-) 10) [1;2;3]" '(9 8 7)' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 29cfc0cf..52345331 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Array module (ref-of-list backing) + (op) + operator sections (+6 tests, 512 total). Array implements + make/length/get/set/init/iter/iteri/map/mapi/fold_left/to_list/ + of_list/copy/blit/fill in OCaml syntax in runtime.sx; backing is a + `ref of list` so set is O(n) but mutation works. (op) sections in + parse-atom: when the token after `(` is a binop and the next is + `)`, emit `(:fun ("a" "b") (:op OP a b))` — `(+)` becomes `fun a b + -> a + b`. Recognises any binop in the precedence table including + `mod`, `land`, `^`, `@`, `::`, etc. Lets us write `List.fold_left + (+) 0 xs` and `((-) 10)` partial applications. - 2026-05-09 Phase 5.1 — csv.ml baseline (split on '\n' then ',', parse-int the second field, fold-left). Exercises char escapes inside string literals, two-stage String.split_on_char, mixed From 073588812a9d551ef06bcd7943d6a79371efab88 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 02:08:21 +0000 Subject: [PATCH 097/298] ocaml: phase 4 'arr.(i)' and 'arr.(i) <- v' array indexing (+3 tests, 515 total) parse-atom-postfix's '.()' branch now disambiguates between let-open and array-get based on whether the head is a module path (':con' or ':field' chain rooted in ':con'). Module paths still emit (:let-open M EXPR); everything else emits (:array-get ARR I). Eval handles :array-get by reading the cell's underlying list at index. The '<-' assignment handler now also accepts :array-get lhs and rewrites the cell with one position changed. Idiomatic OCaml array code now works: let a = Array.make 5 0 in for i = 0 to 4 do a.(i) <- i * i done; a.(3) + a.(4) = 25 let a = Array.init 4 (fun i -> i + 1) in a.(0) + a.(1) + a.(2) + a.(3) = 10 List.(length [1;2;3]) = 3 (* unchanged: List is a module *) --- lib/ocaml/eval.sx | 21 +++++++++++++++++++++ lib/ocaml/parser.sx | 15 ++++++++++++++- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 2171a545..0f2ee6e5 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -490,6 +490,22 @@ (let ((target (ocaml-eval (nth lhs-ast 1) env)) (fname (nth lhs-ast 2))) (begin (dict-set! target fname new-val) nil))) + ((= (ocaml-tag-of lhs-ast) "array-get") + ;; (:array-get ARR I) <- v : rewrite the underlying + ;; list in the ref cell with one cell changed. + (let ((arr (ocaml-eval (nth lhs-ast 1) env)) + (i (ocaml-eval (nth lhs-ast 2) env))) + (begin + (define replace + (fn (lst k) + (cond + ((= lst (list)) (list)) + ((= k 0) (cons new-val (rest lst))) + (else + (cons (first lst) + (replace (rest lst) (- k 1))))))) + (set-nth! arr 0 (replace (nth arr 0) i)) + nil))) (else (error (str "ocaml-eval: <- expects a field-access lhs, got " @@ -602,6 +618,11 @@ (let ((s (ocaml-eval (nth ast 1) env)) (i (ocaml-eval (nth ast 2) env))) (nth s i))) + ((= tag "array-get") + ;; (:array-get ARR I) — Array.get on a ref-of-list. + (let ((arr (ocaml-eval (nth ast 1) env)) + (i (ocaml-eval (nth ast 2) env))) + (nth (nth arr 0) i))) ((= tag "for") ;; (:for NAME LO HI DIR BODY) — DIR is "ascend" or "descend". (let ((name (nth ast 1)) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 2d1757f9..f84e6263 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -574,6 +574,16 @@ ;; `(:field (:field M "x") "y")`. (let ((head (parse-atom))) (begin + ;; Module-path detector: head is :con, or :field chain + ;; whose innermost subject is :con. Used to choose between + ;; (:let-open M EXPR) and (:array-get ARR I) after `.(`. + (define is-module-path? + (fn (h) + (cond + ((not (list? h)) false) + ((= (first h) "con") true) + ((= (first h) "field") (is-module-path? (nth h 1))) + (else false)))) (define loop (fn () (when (at-op? ".") @@ -586,7 +596,10 @@ (let ((inner (parse-expr))) (begin (consume! "op" ")") - (set! head (list :let-open head inner)) + (set! head + (if (is-module-path? head) + (list :let-open head inner) + (list :array-get head inner))) (loop))))) ((at-op? "[") (begin diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 3cdd55a5..52a026eb 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1278,6 +1278,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5022) (eval "(ocaml-run \"List.map ((-) 10) [1;2;3]\")") +;; ── arr.(i) and arr.(i) <- v ──────────────────────────────── +(epoch 5030) +(eval "(ocaml-run \"let a = Array.make 5 7 in a.(2) <- 99; a.(2) + a.(0)\")") +(epoch 5031) +(eval "(ocaml-run \"let a = Array.init 4 (fun i -> i + 1) in a.(0) + a.(1) + a.(2) + a.(3)\")") +(epoch 5032) +(eval "(ocaml-run \"let a = Array.make 5 0 in for i = 0 to 4 do a.(i) <- i * i done; a.(3) + a.(4)\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2030,6 +2038,11 @@ check 5020 "fold_left (+) sum" '15' check 5021 "let f = (*) in f 6 7" '42' check 5022 "List.map ((-) 10) [1;2;3]" '(9 8 7)' +# ── arr.(i) and arr.(i) <- v ──────────────────────────────────── +check 5030 "a.(2) <- 99; a.(2) + a.(0)" '106' +check 5031 "Array.init 4 + sum a.(0..3)" '10' +check 5032 "for + a.(i) <- i*i + sum" '25' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 52345331..3163d670 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,18 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — `arr.(i)` and `arr.(i) <- v` array indexing + syntax (+3 tests, 515 total). parse-atom-postfix's `.(...)` branch + now disambiguates between let-open and array-get based on whether + the head is a module path (`:con` or a `:field` chain rooted in a + `:con`). Module paths still emit `(:let-open M EXPR)`; everything + else emits `(:array-get ARR I)`. Eval handles `:array-get` by + reading the cell's underlying list at index. The `<-` assignment + handler now also accepts `:array-get` lhs and rewrites the cell + with one position changed. Lets us write idiomatic OCaml array code: + let a = Array.make 5 0 in + for i = 0 to 4 do a.(i) <- i * i done; + a.(3) + a.(4) (* = 25 *) - 2026-05-09 Phase 6 — Array module (ref-of-list backing) + (op) operator sections (+6 tests, 512 total). Array implements make/length/get/set/init/iter/iteri/map/mapi/fold_left/to_list/ From a66b2622672cacd20571ff0c6fd0d525cff49805 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 02:16:18 +0000 Subject: [PATCH 098/298] ocaml: phase 5.1 sieve.ml baseline (Sieve of Eratosthenes) Counts primes <= 50, expected 15. Stresses the recently-added Array module + the new array-indexing syntax together with nested control flow: let sieve = Array.make (n + 1) true in sieve.(0) <- false; sieve.(1) <- false; for i = 2 to n do if sieve.(i) then begin let j = ref (i * i) in while !j <= n do sieve.(!j) <- false; j := !j + i done end done; ... Exercises: Array.make, arr.(i), arr.(i) <- v, nested for/while, begin..end blocks, ref/!/:=, integer arithmetic. 24 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/sieve.ml | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 4 ++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/sieve.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 343c9c3b..8c254deb 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -22,6 +22,7 @@ "queens.ml": 2, "quicksort.ml": 44, "roman.ml": 44, + "sieve.ml": 15, "sum_squares.ml": 385, "word_count.ml": 3 } diff --git a/lib/ocaml/baseline/sieve.ml b/lib/ocaml/baseline/sieve.ml new file mode 100644 index 00000000..991dbcc8 --- /dev/null +++ b/lib/ocaml/baseline/sieve.ml @@ -0,0 +1,22 @@ +let count_primes n = + let sieve = Array.make (n + 1) true in + sieve.(0) <- false; + sieve.(1) <- false; + for i = 2 to n do + if sieve.(i) then begin + let j = ref (i * i) in + while !j <= n do + sieve.(!j) <- false; + j := !j + i + done + end + done; + let count = ref 0 in + for i = 2 to n do + if sieve.(i) then count := !count + 1 + done; + !count + +;; + +count_primes 50 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3163d670..9c6c2e00 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — sieve.ml baseline (Sieve of Eratosthenes, + count of primes ≤ 50 = 15). Stresses Array.make + arr.(i) + + arr.(i) <- v + nested for/while loops + `begin..end` block. 24 + baseline programs total. - 2026-05-09 Phase 4 — `arr.(i)` and `arr.(i) <- v` array indexing syntax (+3 tests, 515 total). parse-atom-postfix's `.(...)` branch now disambiguates between let-open and array-get based on whether From f68ea63e4665eb34144540fdaaea7c007c6415f8 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 02:24:45 +0000 Subject: [PATCH 099/298] ocaml: phase 5.1 brainfuck.ml baseline (subset interpreter) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five '+++++.' groups, cumulative accumulator 5+10+15+20+25 = 75. This is a brainfuck *subset* — only > < + - . (no [ ] looping). That's intentional: the goal is to stress imperative idioms that the recently added Array module + array indexing syntax + s.[i] make ergonomic, all in one program. Exercises: Array.make 256 0 arr.(!ptr) arr.(!ptr) <- arr.(!ptr) + 1 prog.[!pc] ref / ! / := while + nested if/else if/else if for op dispatch 25 baseline programs total. --- lib/ocaml/baseline/brainfuck.ml | 20 ++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/brainfuck.ml diff --git a/lib/ocaml/baseline/brainfuck.ml b/lib/ocaml/baseline/brainfuck.ml new file mode 100644 index 00000000..3ab5e0bd --- /dev/null +++ b/lib/ocaml/baseline/brainfuck.ml @@ -0,0 +1,20 @@ +let interpret prog = + let mem = Array.make 256 0 in + let ptr = ref 0 in + let pc = ref 0 in + let n = String.length prog in + let acc = ref 0 in + while !pc < n do + let c = prog.[!pc] in + (if c = '>' then ptr := !ptr + 1 + else if c = '<' then ptr := !ptr - 1 + else if c = '+' then mem.(!ptr) <- mem.(!ptr) + 1 + else if c = '-' then mem.(!ptr) <- mem.(!ptr) - 1 + else if c = '.' then acc := !acc + mem.(!ptr)); + pc := !pc + 1 + done; + !acc + +;; + +interpret "+++++.+++++.+++++.+++++.+++++." diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 8c254deb..904c4111 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -2,6 +2,7 @@ "anagrams.ml": 3, "bfs.ml": 6, "btree.ml": 39, + "brainfuck.ml": 75, "caesar.ml": 215, "calc.ml": 13, "closures.ml": 315, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9c6c2e00..6668bcc8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — brainfuck.ml baseline (subset interpreter, + five `+++++.` groups → cumulative 5+10+15+20+25 = 75). No loop + brackets — the interpreter only handles `> < + - .`, but that's + enough to exercise Array.make, arr.(i), arr.(i) <- v, prog.[!pc], + ref/!/:=, while loop with conditional update via nested if/else. + 25 baseline programs total. - 2026-05-09 Phase 5.1 — sieve.ml baseline (Sieve of Eratosthenes, count of primes ≤ 50 = 15). Stresses Array.make + arr.(i) + arr.(i) <- v + nested for/while loops + `begin..end` block. 24 From 55fe1e4468dd09a7de837adb66bf73d94ff686cc Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 02:35:55 +0000 Subject: [PATCH 100/298] ocaml: phase 6 Array.sort/sub/append/exists/for_all/mem (+5 tests, 520 total) Eight new Array functions, all in OCaml syntax inside runtime.sx, delegating to the corresponding List operation on the cell's underlying list: sort cmp a -> a := List.sort cmp !a (* mutates the cell *) stable_sort = sort fast_sort = sort append a b -> ref (List.append !a !b) sub a pos n -> ref (take n (drop pos !a)) exists p -> List.exists p !a for_all p -> List.for_all p !a mem x a -> List.mem x !a Round-trip: let a = Array.of_list [3;1;4;1;5;9;2;6] in Array.sort compare a; Array.to_list a = [1;1;2;3;4;5;6;9] --- lib/ocaml/runtime.sx | 25 +++++++++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 50 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 546afabb..b0ec999e 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -560,6 +560,31 @@ let to_list a = !a let of_list xs = ref xs let copy a = ref !a + let sort cmp a = a := List.sort cmp !a + let stable_sort = sort + let fast_sort = sort + + let append a b = ref (List.append !a !b) + + let sub a pos n = + let rec take xs k = + if k = 0 then [] + else match xs with + | [] -> [] + | h :: t -> h :: take t (k - 1) + in + let rec drop xs k = + if k = 0 then xs + else match xs with + | [] -> [] + | _ :: t -> drop t (k - 1) + in + ref (take (drop !a pos) n) + + let exists p a = List.exists p !a + let for_all p a = List.for_all p !a + let mem x a = List.mem x !a + let blit src si dst di n = for k = 0 to n - 1 do set dst (di + k) (get src (si + k)) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 52a026eb..4cf26cf0 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1286,6 +1286,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5032) (eval "(ocaml-run \"let a = Array.make 5 0 in for i = 0 to 4 do a.(i) <- i * i done; a.(3) + a.(4)\")") +;; ── Array.sort / sub / append / exists / mem ───────────────── +(epoch 5040) +(eval "(ocaml-run \"let a = Array.of_list [3;1;4;1;5;9;2;6] in Array.sort compare a; Array.to_list a\")") +(epoch 5041) +(eval "(ocaml-run \"let a = Array.of_list [10;20;30;40;50] in let b = Array.sub a 1 3 in Array.fold_left (+) 0 b\")") +(epoch 5042) +(eval "(ocaml-run \"let a = Array.of_list [1;2;3] in let b = Array.of_list [4;5;6] in Array.fold_left (+) 0 (Array.append a b)\")") +(epoch 5043) +(eval "(ocaml-run \"let a = Array.init 5 (fun i -> i * 2) in Array.exists (fun x -> x = 6) a\")") +(epoch 5044) +(eval "(ocaml-run \"let a = Array.of_list [1;2;3;4;5] in Array.mem 3 a\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2043,6 +2055,13 @@ check 5030 "a.(2) <- 99; a.(2) + a.(0)" '106' check 5031 "Array.init 4 + sum a.(0..3)" '10' check 5032 "for + a.(i) <- i*i + sum" '25' +# ── Array.sort / sub / append / exists / mem ──────────────────── +check 5040 "Array.sort compare" '(1 1 2 3 4 5 6 9)' +check 5041 "Array.sub 1 3 sum" '90' +check 5042 "Array.append 6-len sum" '21' +check 5043 "Array.exists = 6" 'true' +check 5044 "Array.mem 3 [1..5]" 'true' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6668bcc8..90a1154f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Array.sort/stable_sort/fast_sort + sub + + append + exists + for_all + mem (+5 tests, 520 total). All + delegate to the corresponding List operation on the cell's + underlying list (sort mutates by replacing the cell, the rest are + pure observers). Array round-trip via of_list → sort → to_list + works as expected. - 2026-05-09 Phase 5.1 — brainfuck.ml baseline (subset interpreter, five `+++++.` groups → cumulative 5+10+15+20+25 = 75). No loop brackets — the interpreter only handles `> < + - .`, but that's From a0e8b64f5cf0bef20a130a6ccbb7a11298b79581 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 02:50:21 +0000 Subject: [PATCH 101/298] ocaml: phase 4 integer division semantics + Int module + max_int/min_int (+5 tests, 525 total) Three things in this commit: 1. Integer / is now truncate-toward-zero on ints, IEEE on floats. The eval-op handler for '/' checks (number? + (= (round x) x)) on both sides; if both integral, applies host floor/ceil based on sign; otherwise falls through to host '/'. 2. Fixes Int.rem, which was returning 0 because (a - b * (a / b)) was using float division: 17 - 5 * 3.4 = 0.0. Now Int.rem 17 5 = 2. 3. Int module fleshed out: max_int / min_int / zero / one / minus_one, succ / pred / neg, add / sub / mul / div / rem, equal, compare. Also adds globals: max_int, min_int, abs_float, float_of_int, int_of_float (the latter two are identity in our dynamic runtime). 17 / 5 = 3 -17 / 5 = -3 (trunc toward zero) Int.rem 17 5 = 2 Int.compare 5 3 = 1 --- lib/ocaml/eval.sx | 10 +++++++++- lib/ocaml/runtime.sx | 20 ++++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 0f2ee6e5..f14a8ef3 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -418,7 +418,15 @@ ((= op "+") (+ lhs rhs)) ((= op "-") (- lhs rhs)) ((= op "*") (* lhs rhs)) - ((= op "/") (/ lhs rhs)) + ((= op "/") + ;; OCaml's `/` is integer division on ints, float on floats. + ;; Pick floor on ints. + (cond + ((and (number? lhs) (number? rhs) + (= (round lhs) lhs) (= (round rhs) rhs)) + (let ((q (/ lhs rhs))) + (if (>= q 0) (floor q) (ceil q)))) + (else (/ lhs rhs)))) ((= op "+.") (+ lhs rhs)) ((= op "-.") (- lhs rhs)) ((= op "*.") (* lhs rhs)) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index b0ec999e..24b86568 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -466,6 +466,21 @@ let abs n = if n < 0 then 0 - n else n let max a b = if a > b then a else b let min a b = if a < b then a else b + let max_int = 4611686018427387903 + let min_int = -4611686018427387904 + let zero = 0 + let one = 1 + let minus_one = -1 + let succ n = n + 1 + let pred n = n - 1 + let neg n = 0 - n + let add a b = a + b + let sub a b = a - b + let mul a b = a * b + let div a b = a / b + let rem a b = a - b * (a / b) + let equal a b = a = b + let compare a b = if a < b then -1 else if a > b then 1 else 0 end ;; module Float = struct @@ -832,6 +847,11 @@ let string_of_float f = _string_of_float f let string_of_bool b = if b then \"true\" else \"false\" let int_of_string s = _int_of_string s + let max_int = 4611686018427387903 + let min_int = -4611686018427387904 + let abs_float f = if f < 0.0 then 0.0 -. f else f + let float_of_int n = n + let int_of_float f = f ") (define ocaml-stdlib-loaded false) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 4cf26cf0..fe118bc5 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1298,6 +1298,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5044) (eval "(ocaml-run \"let a = Array.of_list [1;2;3;4;5] in Array.mem 3 a\")") +;; ── Integer division + Int module + max_int ────────────────── +(epoch 5050) +(eval "(ocaml-run \"17 / 5\")") +(epoch 5051) +(eval "(ocaml-run \"-17 / 5\")") +(epoch 5052) +(eval "(ocaml-run \"Int.rem 17 5\")") +(epoch 5053) +(eval "(ocaml-run \"Int.compare 5 3\")") +(epoch 5054) +(eval "(ocaml-run \"max_int + min_int\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2062,6 +2074,13 @@ check 5042 "Array.append 6-len sum" '21' check 5043 "Array.exists = 6" 'true' check 5044 "Array.mem 3 [1..5]" 'true' +# ── Int module + integer division ─────────────────────────────── +check 5050 "17 / 5 = 3 (int div)" '3' +check 5051 "-17 / 5 = -3 (trunc-zero)" '-3' +check 5052 "Int.rem 17 5" '2' +check 5053 "Int.compare 5 3" '1' +check 5054 "max_int + min_int (host int)" '0' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 90a1154f..455ef84c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — integer `/` is now truncate-toward-zero on + ints, IEEE on floats. Both operands integral → host floor/ceil based + on sign; otherwise host `/`. Fixes `Int.rem` (which was returning 0 + for `Int.rem 17 5` because `a / b` was producing a float). Also adds + Int.{max_int,min_int,zero,one,minus_one,succ,pred,neg,add,sub,mul, + div,rem,equal,compare} and global max_int/min_int/abs_float/ + float_of_int/int_of_float (+5 tests, 525 total). - 2026-05-09 Phase 6 — Array.sort/stable_sort/fast_sort + sub + append + exists + for_all + mem (+5 tests, 520 total). All delegate to the corresponding List operation on the cell's From 8188a82a587d8edb06656455bc0361ffdbcd267a Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 03:01:28 +0000 Subject: [PATCH 102/298] ocaml: phase 6 List.sort upgraded to mergesort (+3 tests, 528 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous List.sort was O(n^2) insertion sort. Replaced with a straightforward mergesort: split lst -> alternating-take into ([odd], [even]) merge xs ys -> classic two-finger merge under cmp sort cmp xs -> base cases [], [x]; otherwise split + recursive sort on each half + merge Tuple destructuring on the split result is expressed via nested match — let-tuple-destructuring would be cleaner but works today. This benefits sort_uniq (which calls sort first), Set.Make.add via sort etc., and any user program using List.sort. Stable_sort is already aliased to sort. --- lib/ocaml/runtime.sx | 26 +++++++++++++++++++++----- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 24b86568..b1b9bf04 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -146,14 +146,30 @@ let rec sort cmp xs = begin - let rec insert x ys = - match ys with - | [] -> [x] - | h :: t -> if cmp x h <= 0 then x :: ys else h :: insert x t + let rec split lst = + match lst with + | [] -> ([], []) + | [x] -> ([x], []) + | x :: y :: rest -> + (match split rest with + | (a, b) -> (x :: a, y :: b)) + in + let rec merge xs ys = + match xs with + | [] -> ys + | x :: xs' -> + (match ys with + | [] -> xs + | y :: ys' -> + if cmp x y <= 0 then x :: merge xs' (y :: ys') + else y :: merge (x :: xs') ys') in match xs with | [] -> [] - | h :: t -> insert h (sort cmp t) + | [x] -> [x] + | _ -> + (match split xs with + | (a, b) -> merge (sort cmp a) (sort cmp b)) end let stable_sort = sort diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index fe118bc5..ecd7e10e 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1310,6 +1310,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5054) (eval "(ocaml-run \"max_int + min_int\")") +;; ── List.sort mergesort ────────────────────────────────────── +(epoch 5060) +(eval "(ocaml-run \"List.sort compare [5;2;8;1;9;3;7;4;6]\")") +(epoch 5061) +(eval "(ocaml-run \"List.sort (fun a b -> b - a) [3;1;4;1;5]\")") +(epoch 5062) +(eval "(ocaml-run \"List.length (List.sort compare [9;8;7;6;5;4;3;2;1;0])\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2081,6 +2089,11 @@ check 5052 "Int.rem 17 5" '2' check 5053 "Int.compare 5 3" '1' check 5054 "max_int + min_int (host int)" '0' +# ── List.sort mergesort ───────────────────────────────────────── +check 5060 "sort 9-element list" '(1 2 3 4 5 6 7 8 9)' +check 5061 "sort with reverse cmp" '(5 4 3 1 1)' +check 5062 "sort 10 reversed -> length" '10' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 455ef84c..7811c7d3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — List.sort upgraded from O(n²) insertion sort + to O(n log n) mergesort (+3 tests, 528 total). split + merge are + inner functions of sort; tuple destructuring on the split result is + expressed via nested match (pattern parser needs explicit + paren-wrapping of tuple patterns inside match arms in some places — + inline let-tuple destructuring on a match RHS would be cleaner if + multi-binding `let (a, b) = ...` were promoted, but this works + today). Should make sort-using baselines noticeably faster on + larger lists; existing sort_uniq automatically benefits. - 2026-05-09 Phase 4 — integer `/` is now truncate-toward-zero on ints, IEEE on floats. Both operands integral → host floor/ceil based on sign; otherwise host `/`. Fixes `Int.rem` (which was returning 0 From cb14a074133543f2d80f80695d6ce150a31b6205 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 03:12:28 +0000 Subject: [PATCH 103/298] ocaml: phase 6 Printf %i/%u/%x/%X/%o + int_to_hex/octal host primitives (+5 tests, 533 total) Three new host primitives in eval.sx: _int_to_hex_lower n -> string of hex digits (lowercase) _int_to_hex_upper n -> string of hex digits (uppercase) _int_to_octal n -> string of octal digits Each builds the digit string by repeated floor(n / base) + mod, prepending the digit at each step. Negative numbers prefix '-' so the output round-trips through int_of_string with a sign. Printf walker now fans out: %d, %i, %u -> _string_of_int %f -> _string_of_float %x -> _int_to_hex_lower %X -> _int_to_hex_upper %o -> _int_to_octal %s, %c, %b -> existing handling Printf.sprintf '%x' 255 = 'ff' Printf.sprintf '%X' 4096 = '1000' Printf.sprintf '%o' 8 = '10' Printf.sprintf '%x %X %o' 255 4096 8 = 'ff 1000 10' --- lib/ocaml/eval.sx | 55 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/runtime.sx | 12 +++++++--- lib/ocaml/test.sh | 19 +++++++++++++++ plans/ocaml-on-sx.md | 7 ++++++ 4 files changed, 90 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index f14a8ef3..3b3afb9e 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -70,6 +70,61 @@ (list "_int_of_string" (fn (s) (parse-number s))) (list "_string_of_int" (fn (i) (str i))) (list "_string_of_float" (fn (f) (str f))) + ;; Integer formatting helpers used by Printf %x/%X/%o. + (list "_int_to_hex_lower" + (fn (n) + (cond + ((= n 0) "0") + (else + (let ((digits "0123456789abcdef") + (m (if (< n 0) (- 0 n) n)) + (out "")) + (begin + (define loop + (fn () + (when (> m 0) + (begin + (set! out (str (nth digits (mod m 16)) out)) + (set! m (floor (/ m 16))) + (loop))))) + (loop) + (if (< n 0) (str "-" out) out))))))) + (list "_int_to_hex_upper" + (fn (n) + (cond + ((= n 0) "0") + (else + (let ((digits "0123456789ABCDEF") + (m (if (< n 0) (- 0 n) n)) + (out "")) + (begin + (define loop + (fn () + (when (> m 0) + (begin + (set! out (str (nth digits (mod m 16)) out)) + (set! m (floor (/ m 16))) + (loop))))) + (loop) + (if (< n 0) (str "-" out) out))))))) + (list "_int_to_octal" + (fn (n) + (cond + ((= n 0) "0") + (else + (let ((digits "01234567") + (m (if (< n 0) (- 0 n) n)) + (out "")) + (begin + (define loop + (fn () + (when (> m 0) + (begin + (set! out (str (nth digits (mod m 8)) out)) + (set! m (floor (/ m 8))) + (loop))))) + (loop) + (if (< n 0) (str "-" out) out))))))) (list "_char_code" (fn (c) (char-code c))) (list "_char_chr" (fn (n) (char-from-code n))) ;; Print: route to host SX `display` (no automatic newline). diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index b1b9bf04..280a9fc6 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -524,12 +524,18 @@ else if pos + 1 < n && _string_get fmt pos = \"%\" then let spec = _string_get fmt (pos + 1) in if spec = \"%\" then walk (pos + 2) (prefix ^ \"%\") - else if spec = \"d\" || spec = \"s\" || spec = \"f\" - || spec = \"c\" || spec = \"b\" then + else if spec = \"d\" || spec = \"i\" || spec = \"s\" + || spec = \"f\" || spec = \"c\" || spec = \"b\" + || spec = \"x\" || spec = \"X\" || spec = \"o\" + || spec = \"u\" then (fun arg -> let s = - if spec = \"d\" then _string_of_int arg + if spec = \"d\" || spec = \"i\" || spec = \"u\" + then _string_of_int arg else if spec = \"f\" then _string_of_float arg + else if spec = \"x\" then _int_to_hex_lower arg + else if spec = \"X\" then _int_to_hex_upper arg + else if spec = \"o\" then _int_to_octal arg else if spec = \"b\" then (if arg then \"true\" else \"false\") else arg diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index ecd7e10e..6501e0bd 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1318,6 +1318,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5062) (eval "(ocaml-run \"List.length (List.sort compare [9;8;7;6;5;4;3;2;1;0])\")") +;; ── Printf %i %x %X %o ───────────────────────────────────────── +(epoch 5070) +(eval "(ocaml-run \"Printf.sprintf \\\"%i\\\" 42\")") +(epoch 5071) +(eval "(ocaml-run \"Printf.sprintf \\\"%x\\\" 255\")") +(epoch 5072) +(eval "(ocaml-run \"Printf.sprintf \\\"%X\\\" 4096\")") +(epoch 5073) +(eval "(ocaml-run \"Printf.sprintf \\\"%o\\\" 8\")") +(epoch 5074) +(eval "(ocaml-run \"Printf.sprintf \\\"%x %X %o\\\" 255 4096 8\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2094,6 +2106,13 @@ check 5060 "sort 9-element list" '(1 2 3 4 5 6 7 8 9)' check 5061 "sort with reverse cmp" '(5 4 3 1 1)' check 5062 "sort 10 reversed -> length" '10' +# ── Printf %i %x %X %o ────────────────────────────────────────── +check 5070 "%i 42" '"42"' +check 5071 "%x 255" '"ff"' +check 5072 "%X 4096" '"1000"' +check 5073 "%o 8" '"10"' +check 5074 "%x %X %o multi" '"ff 1000 10"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7811c7d3..07348aca 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Printf.sprintf adds %i, %u (aliases of %d), + %x (lowercase hex), %X (uppercase hex), %o (octal) (+5 tests, 533 + total). New host primitives `_int_to_hex_lower`, `_int_to_hex_upper`, + `_int_to_octal` build the digit string by repeated host + `floor (/ n base)` + `mod`. The Printf walker fans out specs to the + right host helper. Examples: `%x` 255 = "ff", `%X` 4096 = "1000", + `%o` 8 = "10", multi: `%x %X %o` 255 4096 8 = "ff 1000 10". - 2026-05-09 Phase 6 — List.sort upgraded from O(n²) insertion sort to O(n log n) mergesort (+3 tests, 528 total). split + merge are inner functions of sort; tuple destructuring on the split result is From 7e64695a74dce954bf3bf87f69f36b2e0d750446 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 03:25:50 +0000 Subject: [PATCH 104/298] ocaml: phase 6 Printf width specifiers %5d/%-5d/%05d/%4s (+5 tests, 538 total) The Printf walker now parses optional flags + width digits between '%' and the spec letter: - left-align (default is right-align) 0 zero-pad (default is space-pad; only honoured when not left-aligned) Nd... decimal width digits (any number) After formatting the argument into a base string with the existing spec dispatch (%d/%i/%u/%s/%f/%c/%b/%x/%X/%o), the result is padded to the requested width. Workaround: width and spec_pos are returned packed as width * 1000000 + spec_pos because the parser does not yet support tuple destructuring in let ('let (a, b) = expr in body' fails with 'expected ident'). TODO: lift that limitation; for now the encoding round-trips losslessly for any practical width. Printf.sprintf '%5d' 42 = ' 42' Printf.sprintf '%-5d|' 42 = '42 |' Printf.sprintf '%05d' 42 = '00042' Printf.sprintf '%4s' 'hi' = ' hi' Printf.sprintf 'hi=%-3d, hex=%04x' 9 15 = 'hi=9 , hex=000f' --- lib/ocaml/runtime.sx | 103 ++++++++++++++++++++++++++++++++----------- lib/ocaml/test.sh | 19 ++++++++ plans/ocaml-on-sx.md | 11 +++++ 3 files changed, 108 insertions(+), 25 deletions(-) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 280a9fc6..56ba3f7e 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -512,36 +512,89 @@ end ;; module Printf = struct - (* sprintf walks fmt, accumulating prefix. When it sees a %X - spec, it returns a function of one arg that substitutes the - arg and recurses on the rest of fmt. With no specs, returns - the bare format string. Specs supported: %d %s %f %c %b - (and %% as a literal). Unknown specs are passed through. *) + (* sprintf walks fmt char-by-char. On '%' it parses optional + flags ('-' for left-justify, '0' for zero-pad), an optional + decimal width, and a final spec letter. Specs supported: + %d %i %u %s %f %c %b %x %X %o (and %% as a literal). + Width pads the formatted argument to at least N characters. *) let sprintf fmt = let n = _string_length fmt in + let is_spec c = + c = \"d\" || c = \"i\" || c = \"u\" || c = \"s\" || c = \"f\" + || c = \"c\" || c = \"b\" || c = \"x\" || c = \"X\" || c = \"o\" + in + let is_digit c = + let k = _char_code c in k >= 48 && k <= 57 + in + let pad s width left zero = + let pad_len = width - _string_length s in + if pad_len <= 0 then s + else + let ch = if zero && (not left) then \"0\" else \" \" in + let rec mk k acc = if k = 0 then acc else mk (k - 1) (acc ^ ch) in + let padding = mk pad_len \"\" in + if left then s ^ padding else padding ^ s + in + (* Skip flag chars from p, returning new pos. Records flags in + shared refs (set above each call). *) + let parse_flags_loop p left_flag zero_flag = + let i = ref p in + let cont = ref true in + while !cont do + if !i < n then + let c = _string_get fmt !i in + if c = \"-\" then (left_flag := true; i := !i + 1) + else if c = \"0\" then (zero_flag := true; i := !i + 1) + else cont := false + else cont := false + done; + !i + in + let parse_width_loop p = + let i = ref p in + let w = ref 0 in + let cont = ref true in + while !cont do + if !i < n then + let c = _string_get fmt !i in + if is_digit c then + (w := !w * 10 + (_char_code c - 48); i := !i + 1) + else cont := false + else cont := false + done; + (!w) * 1000000 + (!i) + in let rec walk pos prefix = if pos >= n then prefix else if pos + 1 < n && _string_get fmt pos = \"%\" then - let spec = _string_get fmt (pos + 1) in - if spec = \"%\" then walk (pos + 2) (prefix ^ \"%\") - else if spec = \"d\" || spec = \"i\" || spec = \"s\" - || spec = \"f\" || spec = \"c\" || spec = \"b\" - || spec = \"x\" || spec = \"X\" || spec = \"o\" - || spec = \"u\" then - (fun arg -> - let s = - if spec = \"d\" || spec = \"i\" || spec = \"u\" - then _string_of_int arg - else if spec = \"f\" then _string_of_float arg - else if spec = \"x\" then _int_to_hex_lower arg - else if spec = \"X\" then _int_to_hex_upper arg - else if spec = \"o\" then _int_to_octal arg - else if spec = \"b\" then - (if arg then \"true\" else \"false\") - else arg - in - walk (pos + 2) (prefix ^ s)) - else walk (pos + 1) (prefix ^ _string_get fmt pos) + if _string_get fmt (pos + 1) = \"%\" then + walk (pos + 2) (prefix ^ \"%\") + else + let left_flag = ref false in + let zero_flag = ref false in + let after_flags = parse_flags_loop (pos + 1) left_flag zero_flag in + let packed = parse_width_loop after_flags in + let width = packed / 1000000 in + let spec_pos = packed - width * 1000000 in + if spec_pos < n && is_spec (_string_get fmt spec_pos) then + let spec = _string_get fmt spec_pos in + let left = !left_flag in + let zero = !zero_flag in + (fun arg -> + let raw = + if spec = \"d\" || spec = \"i\" || spec = \"u\" + then _string_of_int arg + else if spec = \"f\" then _string_of_float arg + else if spec = \"x\" then _int_to_hex_lower arg + else if spec = \"X\" then _int_to_hex_upper arg + else if spec = \"o\" then _int_to_octal arg + else if spec = \"b\" then + (if arg then \"true\" else \"false\") + else arg + in + let s = pad raw width left zero in + walk (spec_pos + 1) (prefix ^ s)) + else walk (pos + 1) (prefix ^ _string_get fmt pos) else walk (pos + 1) (prefix ^ _string_get fmt pos) in walk 0 \"\" diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 6501e0bd..6b12658f 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1330,6 +1330,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5074) (eval "(ocaml-run \"Printf.sprintf \\\"%x %X %o\\\" 255 4096 8\")") +;; ── Printf width specifiers ───────────────────────────────── +(epoch 5080) +(eval "(ocaml-run \"Printf.sprintf \\\"%5d\\\" 42\")") +(epoch 5081) +(eval "(ocaml-run \"Printf.sprintf \\\"%-5d|\\\" 42\")") +(epoch 5082) +(eval "(ocaml-run \"Printf.sprintf \\\"%05d\\\" 42\")") +(epoch 5083) +(eval "(ocaml-run \"Printf.sprintf \\\"%4s\\\" \\\"hi\\\"\")") +(epoch 5084) +(eval "(ocaml-run \"Printf.sprintf \\\"hi=%-3d, hex=%04x\\\" 9 15\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2113,6 +2125,13 @@ check 5072 "%X 4096" '"1000"' check 5073 "%o 8" '"10"' check 5074 "%x %X %o multi" '"ff 1000 10"' +# ── Printf width specifiers ───────────────────────────────────── +check 5080 "%5d 42 right-pad" '" 42"' +check 5081 "%-5d| 42 left-pad" '"42 |"' +check 5082 "%05d 42 zero-pad" '"00042"' +check 5083 "%4s hi" '" hi"' +check 5084 "%-3d %04x mixed" '"hi=9 , hex=000f"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 07348aca..6f48af2c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Printf width specifiers `%5d` / `%-5d` / + `%05d` / `%4s` etc. (+5 tests, 538 total). Walker now parses + optional `-` (left-align) and `0` (zero-pad) flags after `%`, then + optional decimal width digits, then the spec letter. After + formatting the arg into a base string, pads to the width using + spaces (or zeros if `0` flag and not `-`). Encoded width+spec_pos + return as `width * 1000000 + spec_pos` because the parser does not + yet support tuple destructuring in `let` (TODO: lift that + limitation; for now this round-trips losslessly for any practical + width). Examples: `%5d` 42 = " 42", `%-5d|` 42 = "42 |", + `%05d` 42 = "00042". - 2026-05-09 Phase 6 — Printf.sprintf adds %i, %u (aliases of %d), %x (lowercase hex), %X (uppercase hex), %o (octal) (+5 tests, 533 total). New host primitives `_int_to_hex_lower`, `_int_to_hex_upper`, From dab8718289464d626b66e30538bbe1112f15eac0 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 03:40:38 +0000 Subject: [PATCH 105/298] ocaml: phase 4 'let PATTERN = expr in body' tuple destructuring (+3 tests, 541 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When 'let' is followed by '(', parse-let now reads a full pattern (via the existing parse-pattern used by match), expects '=', then 'in', and desugars to: let PATTERN = EXPR in BODY => match EXPR with PATTERN -> BODY This reuses the entire pattern-matching machinery, so any pattern the match parser accepts works here too — paren-tuples, nested tuples, cons patterns, list patterns. No 'rec' allowed for pattern bindings (real OCaml's restriction). let (a, b) = (1, 2) in a + b = 3 let (a, b, c) = (10, 20, 30) in a + b + c = 60 let pair = (5, 7) in let (x, y) = pair in x * y = 35 Also retroactively cleaned up Printf's iter-97 width-pos packing hack ('width * 1000000 + spec_pos') — it's now 'let (width, spec_pos) = parse_width_loop after_flags in ...' like real OCaml. --- lib/ocaml/parser.sx | 13 +++++++++++++ lib/ocaml/runtime.sx | 6 ++---- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index f84e6263..fd7db26a 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -765,6 +765,19 @@ (consume! "keyword" "in") (let ((body (parse-expr))) (list :let-open path body)))))) + ;; `let PATTERN = expr in body` — non-trivial pattern + ;; desugars to `match expr with PATTERN -> body`. Triggers + ;; on `(` (paren-tuple, possibly nested) immediately after + ;; `let` (no `rec` allowed for pattern bindings). + ((at-op? "(") + (let ((pat (parse-pattern))) + (begin + (consume! "op" "=") + (let ((rhs (parse-expr))) + (begin + (consume! "keyword" "in") + (let ((body (parse-expr))) + (list :match rhs (list (list :case pat body))))))))) (else (let ((reccy false) (bindings (list))) (begin diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 56ba3f7e..dbd6b7d0 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -562,7 +562,7 @@ else cont := false else cont := false done; - (!w) * 1000000 + (!i) + (!w, !i) in let rec walk pos prefix = if pos >= n then prefix @@ -573,9 +573,7 @@ let left_flag = ref false in let zero_flag = ref false in let after_flags = parse_flags_loop (pos + 1) left_flag zero_flag in - let packed = parse_width_loop after_flags in - let width = packed / 1000000 in - let spec_pos = packed - width * 1000000 in + let (width, spec_pos) = parse_width_loop after_flags in if spec_pos < n && is_spec (_string_get fmt spec_pos) then let spec = _string_get fmt spec_pos in let left = !left_flag in diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 6b12658f..f395cc78 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1342,6 +1342,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5084) (eval "(ocaml-run \"Printf.sprintf \\\"hi=%-3d, hex=%04x\\\" 9 15\")") +;; ── let (a, b) = expr in body — tuple destructure ───────────── +(epoch 5090) +(eval "(ocaml-run \"let (a, b) = (1, 2) in a + b\")") +(epoch 5091) +(eval "(ocaml-run \"let (a, b, c) = (10, 20, 30) in a + b + c\")") +(epoch 5092) +(eval "(ocaml-run \"let pair = (5, 7) in let (x, y) = pair in x * y\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2132,6 +2140,11 @@ check 5082 "%05d 42 zero-pad" '"00042"' check 5083 "%4s hi" '" hi"' check 5084 "%-3d %04x mixed" '"hi=9 , hex=000f"' +# ── let (a, b) = expr in body ─────────────────────────────────── +check 5090 "let (a, b) = (1,2)" '3' +check 5091 "let (a, b, c) = (10,20,30)" '60' +check 5092 "let pair; let (x, y) = pair" '35' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6f48af2c..ae9de5b5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — `let PATTERN = expr in body` tuple + destructuring (+3 tests, 541 total). When `let` is followed by `(`, + parse-let now reads a full pattern, expects `=`, then `in`, and + desugars to `(:match expr ((:case PATTERN body)))`. Reuses the + pattern parser used by match. `let (a, b, c) = (10, 20, 30) in + a+b+c` → 60. Also retroactively cleans up the Printf width-pos + packing hack from iteration 97 — it's now `let (width, spec_pos) + = parse_width_loop after_flags in ...` like real OCaml. - 2026-05-09 Phase 6 — Printf width specifiers `%5d` / `%-5d` / `%05d` / `%4s` etc. (+5 tests, 538 total). Walker now parses optional `-` (left-align) and `0` (zero-pad) flags after `%`, then From 41190c6d23ec32bef772ff9251ff9fd643ed21f3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 03:59:20 +0000 Subject: [PATCH 106/298] ocaml: phase 6 Hashtbl.keys/values/bindings/remove/clear (+4 tests, 545 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new host primitives: _hashtbl_remove t k -> dissoc the key from the underlying dict _hashtbl_clear t -> reset the cell to {} Eight new OCaml-syntax helpers in runtime.sx Hashtbl module: bindings t = _hashtbl_to_list t keys t = List.map (fun (k, _) -> k) (...) values t = List.map (fun (_, v) -> v) (...) to_seq t = bindings t to_seq_keys / to_seq_values remove / clear / reset The keys/values implementations use a 'fun pair -> match pair with (k, _) -> k' indirection because parse-fun does not currently allow tuple patterns directly on parameters. Same restriction we worked around in iteration 98's let-pattern desugaring. Also: a detour attempting to add top-level 'let (a, b) = expr' support was started but reverted — parse-decl-let in the outer ocaml-parse-program scope does not have access to parse-pattern (which is local to ocaml-parse). Will need a slice + re-parse trick later. --- lib/ocaml/eval.sx | 11 +++++++++++ lib/ocaml/runtime.sx | 21 +++++++++++++++++++++ lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 4 files changed, 58 insertions(+) diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 3b3afb9e..f22357d7 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -171,6 +171,17 @@ (fn (t) (fn (k) (has-key? (nth t 0) (str k))))) (list "_hashtbl_length" (fn (t) (len (keys (nth t 0))))) + (list "_hashtbl_remove" + (fn (t) (fn (k) + (let ((d (nth t 0))) + (begin + (set-nth! t 0 (dissoc d (str k))) + nil))))) + (list "_hashtbl_clear" + (fn (t) + (begin + (set-nth! t 0 {}) + nil))) ;; _lazy_force: evaluate the thunk on first force, cache result. ;; cell: one-elt list whose value is ("Thunk" expr env) or ;; ("Forced" v). diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index dbd6b7d0..8f40331b 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -774,6 +774,27 @@ | (k, v) :: rest -> go rest (f k v a) in go (_hashtbl_to_list t) acc + + (* OCaml's Hashtbl doesn't expose `keys` directly — it offers + `to_seq_keys` etc. We provide both: a plain list snapshot and + the keys/values projections that are commonly useful. *) + let bindings t = _hashtbl_to_list t + + let keys t = + List.map (fun pair -> match pair with (k, _) -> k) + (_hashtbl_to_list t) + + let values t = + List.map (fun pair -> match pair with (_, v) -> v) + (_hashtbl_to_list t) + + let to_seq t = _hashtbl_to_list t + let to_seq_keys = keys + let to_seq_values = values + + let remove t k = _hashtbl_remove t k + let reset t = _hashtbl_clear t + let clear t = _hashtbl_clear t end ;; module Map = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index f395cc78..c17c1f92 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1350,6 +1350,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5092) (eval "(ocaml-run \"let pair = (5, 7) in let (x, y) = pair in x * y\")") +;; ── Hashtbl.keys / values / remove / clear ──────────────────── +(epoch 5100) +(eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"a\\\" 1; Hashtbl.add t \\\"b\\\" 2; List.length (Hashtbl.keys t)\")") +(epoch 5101) +(eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"a\\\" 5; Hashtbl.add t \\\"b\\\" 7; List.fold_left (+) 0 (Hashtbl.values t)\")") +(epoch 5102) +(eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"x\\\" 1; Hashtbl.add t \\\"y\\\" 2; Hashtbl.remove t \\\"x\\\"; Hashtbl.length t\")") +(epoch 5103) +(eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"a\\\" 1; Hashtbl.clear t; Hashtbl.length t\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2145,6 +2155,12 @@ check 5090 "let (a, b) = (1,2)" '3' check 5091 "let (a, b, c) = (10,20,30)" '60' check 5092 "let pair; let (x, y) = pair" '35' +# ── Hashtbl.keys/values/remove/clear ──────────────────────────── +check 5100 "Hashtbl.keys length" '2' +check 5101 "Hashtbl.values sum" '12' +check 5102 "Hashtbl.remove + length" '1' +check 5103 "Hashtbl.clear + length" '0' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ae9de5b5..e95cc8dd 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Hashtbl.keys / values / bindings / remove / + clear / reset / to_seq / to_seq_keys / to_seq_values (+4 tests, 545 + total). Two new host primitives `_hashtbl_remove` and + `_hashtbl_clear`; the rest are pure OCaml-syntax helpers in + runtime.sx that map over `_hashtbl_to_list`. `keys` and `values` + pattern-match the (k, v) tuples to extract one side. Note: a + detour to also support top-level `let (a, b) = expr` was reverted + — `parse-decl-let` lives in the outer ocaml-parse-program scope + which doesn't have access to parse-pattern; will need a slice + + inner-parse trick later. - 2026-05-09 Phase 4 — `let PATTERN = expr in body` tuple destructuring (+3 tests, 541 total). When `let` is followed by `(`, parse-let now reads a full pattern, expects `=`, then `in`, and From 8ca3ef342d2b03cb506836a325dae306293f845a Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 04:12:16 +0000 Subject: [PATCH 107/298] ocaml: phase 6 Random module (deterministic LCG) (+4 tests, 549 total) Linear-congruential PRNG with mutable seed (_state ref). API: init s seed the PRNG self_init () default seed (1) int bound 0 <= n < bound bool () fair coin float bound uniform in [0, bound) bits () 30 bits Stepping rule: state := (state * 1103515245 + 12345) mod 2147483647 result := |state| mod bound Same seed reproduces the same sequence. Real OCaml's Random uses Lagged Fibonacci; ours is simpler but adequate for shuffles and Monte Carlo demos in baseline programs. Random.init 42; Random.int 100 = 48 Random.init 1; Random.int 10 = 0 --- lib/ocaml/runtime.sx | 24 ++++++++++++++++++++++++ lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 47 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 8f40331b..548a2f92 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -609,6 +609,30 @@ let asprintf fmt = Printf.sprintf fmt end ;; + module Random = struct + (* Linear-congruential PRNG. Deterministic for testing — same + seed reproduces the same sequence. Real OCaml's Random uses + Lagged Fibonacci; ours is simpler but adequate for shuffles + and Monte Carlo demos in baseline programs. *) + let _state = ref 1 + + let init s = _state := s + + let self_init () = _state := 1 + + let int bound = + _state := (!_state * 1103515245 + 12345) mod 2147483647; + Int.abs (!_state) mod bound + + let bool () = int 2 = 1 + + let float bound = + let n = int 1000000 in + float_of_int n /. 1000000.0 *. bound + + let bits () = int 1073741824 + end ;; + module Lazy = struct let force lz = _lazy_force lz end ;; diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index c17c1f92..b8c5500d 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1360,6 +1360,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5103) (eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"a\\\" 1; Hashtbl.clear t; Hashtbl.length t\")") +;; ── Random module (deterministic LCG) ───────────────────────── +(epoch 5110) +(eval "(ocaml-run \"Random.init 42; Random.int 100\")") +(epoch 5111) +(eval "(ocaml-run \"Random.init 42; let a = Random.int 100 in let b = Random.int 100 in let c = Random.int 100 in a + b + c\")") +(epoch 5112) +(eval "(ocaml-run \"Random.init 1; Random.int 10\")") +(epoch 5113) +(eval "(ocaml-run \"Random.init 7; Random.bool ()\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2161,6 +2171,12 @@ check 5101 "Hashtbl.values sum" '12' check 5102 "Hashtbl.remove + length" '1' check 5103 "Hashtbl.clear + length" '0' +# ── Random (deterministic LCG) ────────────────────────────────── +check 5110 "Random.int 100 seed=42" '48' +check 5111 "Random.int x3 seed=42 sum" '152' +check 5112 "Random.int 10 seed=1" '0' +check 5113 "Random.bool seed=7" 'true' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e95cc8dd..7d8df6d3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Random module (LCG-based, deterministic) (+4 + tests, 549 total). Linear-congruential PRNG with mutable seed + (`_state` ref). API: init, self_init, int, bool, float, bits. + `int bound` returns `|state| mod bound` after stepping. Same seed + reproduces same sequence — useful for testing shuffles and Monte + Carlo demos. Real OCaml's Random uses Lagged Fibonacci; ours is + simpler but adequate for baseline programs. - 2026-05-09 Phase 6 — Hashtbl.keys / values / bindings / remove / clear / reset / to_seq / to_seq_keys / to_seq_values (+4 tests, 545 total). Two new host primitives `_hashtbl_remove` and From 64f4f10c32858290f736e2fbc81924ec6db61c4e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 04:25:18 +0000 Subject: [PATCH 108/298] ocaml: phase 4 'fun (a, b) -> body' tuple-param destructuring (+4 tests, 553 total) parse-fun's collect-params now detects '(IDENT, ...)' as a tuple-pattern parameter (lookahead at peek-tok-at 1/2 distinguishes from '(x : T)' and '()' cases that try-consume-param! already handles). For each tuple param it: 1. parse-pattern to get the full pattern AST 2. generate a synthetic __pat_N name as the actual fun parameter 3. push (synth_name, pattern) onto tuple-binds After parsing the body, wraps it innermost-first with one 'match __pat_N with PAT -> ...' per tuple-param. The user-visible result is a (:fun (params...) body) where params are all simple names but the body destructures. Also retroactively simplifies Hashtbl.keys/values from 'fun pair -> match pair with (k, _) -> k' to plain 'fun (k, _) -> k', closing the iteration-99 workaround. (fun (a, b) -> a + b) (3, 7) = 10 List.map (fun (a, b) -> a * b) [(1, 2); (3, 4); (5, 6)] = [2; 12; 30] List.map (fun (k, _) -> k) [("a", 1); ("b", 2)] = ["a"; "b"] (fun a (b, c) d -> a + b + c + d) 1 (2, 3) 4 = 10 --- lib/ocaml/parser.sx | 50 +++++++++++++++++++++++++++++++++++++++----- lib/ocaml/runtime.sx | 9 ++------ lib/ocaml/test.sh | 16 ++++++++++++++ plans/ocaml-on-sx.md | 10 +++++++++ 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index fd7db26a..b188298a 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -718,20 +718,60 @@ (fn () (let - ((params (list))) + ((params (list)) + (tuple-binds (list))) (begin (define collect-params (fn () - (let ((nm (try-consume-param!))) - (when (not (= nm nil)) - (begin (append! params nm) (collect-params)))))) + (cond + ;; `(IDENT, ...)` — tuple-pattern param. Generate a + ;; synthetic name and remember the pattern so we + ;; can wrap the body with a match. + ((and (at-op? "(") + (= (ocaml-tok-type (peek-tok-at 1)) "ident") + (or (= (ocaml-tok-value (peek-tok-at 2)) ",") + (= (ocaml-tok-value (peek-tok-at 2)) ")") + (= (ocaml-tok-value (peek-tok-at 2)) ":"))) + (cond + ;; (x : T) is a typed simple param — let the + ;; original try-consume-param! handle that. + ((= (ocaml-tok-value (peek-tok-at 2)) ":") + (let ((nm (try-consume-param!))) + (begin (append! params nm) (collect-params)))) + (else + (let ((pat (parse-pattern))) + (let ((nm (str "__pat_" (len tuple-binds)))) + (begin + (append! tuple-binds (list nm pat)) + (append! params nm) + (collect-params))))))) + (else + (let ((nm (try-consume-param!))) + (when (not (= nm nil)) + (begin (append! params nm) (collect-params)))))))) (collect-params) (when (= (len params) 0) (error "ocaml-parse: fun expects at least one parameter")) (consume! "op" "->") - (let ((body (parse-expr))) (list :fun params body)))))) + (let ((body (parse-expr))) + ;; Wrap body with `match __pat_N with PAT -> ...` for + ;; each tuple-param, innermost first. + (let ((wrapped body)) + (begin + (define wrap-binds + (fn (xs) + (when (not (= xs (list))) + (begin + (let ((nm (nth (first xs) 0)) + (pat (nth (first xs) 1))) + (set! wrapped + (list :match (list :var nm) + (list (list :case pat wrapped))))) + (wrap-binds (rest xs)))))) + (wrap-binds (reverse tuple-binds)) + (list :fun params wrapped)))))))) (define parse-let (fn () diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 548a2f92..4fab4505 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -804,13 +804,8 @@ the keys/values projections that are commonly useful. *) let bindings t = _hashtbl_to_list t - let keys t = - List.map (fun pair -> match pair with (k, _) -> k) - (_hashtbl_to_list t) - - let values t = - List.map (fun pair -> match pair with (_, v) -> v) - (_hashtbl_to_list t) + let keys t = List.map (fun (k, _) -> k) (_hashtbl_to_list t) + let values t = List.map (fun (_, v) -> v) (_hashtbl_to_list t) let to_seq t = _hashtbl_to_list t let to_seq_keys = keys diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index b8c5500d..557461d0 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1370,6 +1370,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5113) (eval "(ocaml-run \"Random.init 7; Random.bool ()\")") +;; ── fun (a, b) -> body — tuple param destructure ────────────── +(epoch 5120) +(eval "(ocaml-run \"(fun (a, b) -> a + b) (3, 7)\")") +(epoch 5121) +(eval "(ocaml-run \"List.map (fun (a, b) -> a * b) [(1, 2); (3, 4); (5, 6)]\")") +(epoch 5122) +(eval "(ocaml-run \"List.map (fun (k, _) -> k) [(\\\"a\\\", 1); (\\\"b\\\", 2)]\")") +(epoch 5123) +(eval "(ocaml-run \"(fun a (b, c) d -> a + b + c + d) 1 (2, 3) 4\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2177,6 +2187,12 @@ check 5111 "Random.int x3 seed=42 sum" '152' check 5112 "Random.int 10 seed=1" '0' check 5113 "Random.bool seed=7" 'true' +# ── fun (a, b) -> body tuple param ────────────────────────────── +check 5120 "fun (a, b) -> a + b" '10' +check 5121 "List.map fun (a, b)" '(2 12 30)' +check 5122 "List.map fun (k, _)" '("a" "b")' +check 5123 "fun a (b, c) d mixed" '10' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7d8df6d3..445d72ca 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — `fun (a, b) -> body` tuple-param destructuring + (+4 tests, 553 total). parse-fun's collect-params now detects + `(IDENT, ...)` (lookahead at peek-tok-at 1/2 to distinguish from + `(x : T)` and `()` cases), generates a synthetic `__pat_N` name as + the actual fun param, and remembers the pattern in tuple-binds. + After parsing the body, wraps it innermost-first with one + `match __pat_N with PAT -> ...` per tuple-param. Also retroactively + simplifies `Hashtbl.keys`/`values` from + `fun pair -> match pair with (k, _) -> k` to plain + `fun (k, _) -> k`. - 2026-05-09 Phase 6 — Random module (LCG-based, deterministic) (+4 tests, 549 total). Linear-congruential PRNG with mutable seed (`_state` ref). API: init, self_init, int, bool, float, bits. From b526d81a4c6d3138aca5760c184e72721ed7867e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 04:36:33 +0000 Subject: [PATCH 109/298] ocaml: phase 4 'let f (a, b) = body' tuple-param on inner-let (+3 tests, 556 total) Mirrors iteration 101's parse-fun change inside parse-let's parse-one!: - same '(IDENT, ...)' detection on collect-params - same __pat_N synth name for the function param - same innermost-first match-wrapping Difference: for inner-let the wrapping is applied to the rhs of the let-binding (which is the function value), not directly to a fun body. let f (a, b) = a + b in f (3, 7) = 10 let g x (a, b) = x + a + b in g 1 (2, 3) = 6 let h (a, b) (c, d) = a * b + c * d in h (1, 2) (3, 4) = 14 --- lib/ocaml/parser.sx | 46 +++++++++++++++++++++++++++++++++++++++----- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index b188298a..0b539910 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -826,13 +826,33 @@ (define parse-one! (fn () (let ((nm (ocaml-tok-value (consume! "ident" nil))) - (ps (list))) + (ps (list)) + (tuple-binds (list))) (begin (define collect-params (fn () - (let ((p (try-consume-param!))) - (when (not (= p nil)) - (begin (append! ps p) (collect-params)))))) + (cond + ;; `(IDENT, ...)` — tuple param. + ((and (at-op? "(") + (= (ocaml-tok-type (peek-tok-at 1)) "ident") + (or (= (ocaml-tok-value (peek-tok-at 2)) ",") + (= (ocaml-tok-value (peek-tok-at 2)) ")"))) + (cond + ((= (ocaml-tok-value (peek-tok-at 2)) ":") + (let ((p (try-consume-param!))) + (when (not (= p nil)) + (begin (append! ps p) (collect-params))))) + (else + (let ((pat (parse-pattern))) + (let ((sn (str "__pat_" (len tuple-binds)))) + (begin + (append! tuple-binds (list sn pat)) + (append! ps sn) + (collect-params))))))) + (else + (let ((p (try-consume-param!))) + (when (not (= p nil)) + (begin (append! ps p) (collect-params)))))))) (collect-params) ;; Optional type annotation: skip `: TYPE` before `=`. (when (at-op? ":") @@ -848,7 +868,23 @@ (skip-tann))) (consume! "op" "=") (let ((rhs (parse-expr))) - (append! bindings (list nm ps rhs))))))) + (begin + ;; Wrap rhs with `match __pat_N with PAT -> ...` + ;; for each tuple-param, innermost first. + (let ((wrapped rhs)) + (begin + (define wrap-binds + (fn (xs) + (when (not (= xs (list))) + (begin + (let ((sn (nth (first xs) 0)) + (pat (nth (first xs) 1))) + (set! wrapped + (list :match (list :var sn) + (list (list :case pat wrapped))))) + (wrap-binds (rest xs)))))) + (wrap-binds (reverse tuple-binds)) + (append! bindings (list nm ps wrapped)))))))))) (parse-one!) (define more (fn () diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 557461d0..0932e6e0 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1380,6 +1380,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5123) (eval "(ocaml-run \"(fun a (b, c) d -> a + b + c + d) 1 (2, 3) 4\")") +;; ── let f (a, b) = body — tuple param on inner-let ──────────── +(epoch 5130) +(eval "(ocaml-run \"let f (a, b) = a + b in f (3, 7)\")") +(epoch 5131) +(eval "(ocaml-run \"let g x (a, b) = x + a + b in g 1 (2, 3)\")") +(epoch 5132) +(eval "(ocaml-run \"let h (a, b) (c, d) = a * b + c * d in h (1, 2) (3, 4)\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2193,6 +2201,11 @@ check 5121 "List.map fun (a, b)" '(2 12 30)' check 5122 "List.map fun (k, _)" '("a" "b")' check 5123 "fun a (b, c) d mixed" '10' +# ── let f (a, b) = body — let with tuple param ────────────────── +check 5130 "let f (a, b) = a + b" '10' +check 5131 "let g x (a, b) mixed" '6' +check 5132 "let h (a, b) (c, d) curried" '14' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 445d72ca..3374b968 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — `let f (a, b) = body in body2` tuple-param on + inner-let bindings (+3 tests, 556 total). Mirrors iteration 101's + parse-fun change inside parse-let's parse-one!: same `(IDENT, ...)` + detection, same `__pat_N` synth name, same innermost-first match + wrapping — but applied to the rhs of the let-binding (which is the + function value). Lets us write `let f (a, b) = a + b in f (3, 7)`, + `let g x (a, b) = x + a + b in g 1 (2, 3)`, and `let h (a, b) + (c, d) = a * b + c * d in h (1, 2) (3, 4)`. - 2026-05-09 Phase 4 — `fun (a, b) -> body` tuple-param destructuring (+4 tests, 553 total). parse-fun's collect-params now detects `(IDENT, ...)` (lookahead at peek-tok-at 1/2 to distinguish from From 82ffc695a58df1d16faad4c28a4d668f921a43cf Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 04:51:11 +0000 Subject: [PATCH 110/298] ocaml: phase 4 top-level 'let f (a, b) = body' tuple-param decl (+3 tests, 559 total) parse-decl-let lives in the outer ocaml-parse-program scope and does not have access to parse-pattern (which is local to ocaml-parse). Source-slicing approach instead: 1. detect '(IDENT, ...)' in collect-params 2. scan tokens to the matching ')' (tracking nested parens) 3. slice the pattern source string from src 4. push (synth_name, pat_src) onto tuple-srcs Then after collecting params, the rhs source string gets wrapped with 'match SN with PAT_SRC -> (RHS_SRC)' for each tuple-param, innermost-first, and the final string is fed through ocaml-parse. End result is the same AST shape as the iteration-102 inner-let case: a function whose body destructures a synthetic name. let f (a, b) = a + b ;; f (3, 7) = 10 let g x (a, b) = x + a + b ;; g 1 (2, 3) = 6 let h (a, b) (c, d) = a * b + c * d ;; h (1, 2) (3, 4) = 14 --- lib/ocaml/parser.sx | 58 +++++++++++++++++++++++++++++++++++++++++--- lib/ocaml/test.sh | 13 ++++++++++ plans/ocaml-on-sx.md | 10 ++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 0b539910..459e4fc2 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -1288,7 +1288,8 @@ (str "__unit_" idx))) (else (ocaml-tok-value (consume! "ident" nil))))) - (ps (list))) + (ps (list)) + (tuple-srcs (list))) (begin (define collect-params (fn () @@ -1307,6 +1308,39 @@ (advance-tok!) (advance-tok!) (append! ps (str "__unit_" idx)) (collect-params))) + ;; `(IDENT, ...)` — tuple pattern. Slice the + ;; source from `(` through matching `)` and + ;; remember it; substitute synth-name. + ((and (at-op? "(") + (< (+ idx 2) tok-len) + (= (ocaml-tok-type (nth tokens (+ idx 1))) + "ident") + (= (ocaml-tok-value (nth tokens (+ idx 2))) + ",")) + (let ((pat-start (cur-pos)) (depth 1)) + (begin + (advance-tok!) ;; consume `(` + (define skip + (fn () + (cond + ((>= idx tok-len) nil) + ((at-op? "(") + (begin (set! depth (+ depth 1)) + (advance-tok!) (skip))) + ((at-op? ")") + (cond + ((= depth 1) (advance-tok!)) + (else (begin + (set! depth (- depth 1)) + (advance-tok!) (skip))))) + (else (begin (advance-tok!) (skip)))))) + (skip) + (let ((sn (str "__pat_" (len tuple-srcs))) + (pat-src (slice src pat-start (cur-pos)))) + (begin + (append! tuple-srcs (list sn pat-src)) + (append! ps sn) + (collect-params)))))) (else nil)))) (collect-params) ;; Optional type annotation: skip `: TYPE` before `=`. @@ -1326,8 +1360,26 @@ (begin (skip-let-rhs-boundary!) (let ((expr-src (slice src expr-start (cur-pos)))) - (let ((expr (ocaml-parse expr-src))) - (append! bindings (list nm ps expr)))))))))) + (begin + ;; Wrap rhs src with `match __pat_N with PAT + ;; -> ...` for each tuple-param, innermost + ;; first, then re-parse. + (let ((wrapped expr-src)) + (begin + (define wrap-binds + (fn (xs) + (when (not (= xs (list))) + (begin + (let ((sn (nth (first xs) 0)) + (pat-src (nth (first xs) 1))) + (set! wrapped + (str "match " sn " with " + pat-src " -> (" + wrapped ")"))) + (wrap-binds (rest xs)))))) + (wrap-binds (reverse tuple-srcs)) + (let ((expr (ocaml-parse wrapped))) + (append! bindings (list nm ps expr))))))))))))) (parse-one!) (define more (fn () diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 0932e6e0..2834ef4c 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1388,6 +1388,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5132) (eval "(ocaml-run \"let h (a, b) (c, d) = a * b + c * d in h (1, 2) (3, 4)\")") +;; ── top-level let f (a, b) = body — tuple param decl ────────── +(epoch 5140) +(eval "(ocaml-run-program \"let f (a, b) = a + b ;; f (3, 7)\")") +(epoch 5141) +(eval "(ocaml-run-program \"let g x (a, b) = x + a + b ;; g 1 (2, 3)\")") +(epoch 5142) +(eval "(ocaml-run-program \"let h (a, b) (c, d) = a * b + c * d ;; h (1, 2) (3, 4)\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2206,6 +2214,11 @@ check 5130 "let f (a, b) = a + b" '10' check 5131 "let g x (a, b) mixed" '6' check 5132 "let h (a, b) (c, d) curried" '14' +# ── top-level let f (a, b) = body — tuple param decl ──────────── +check 5140 "top let f (a, b) = a+b" '10' +check 5141 "top let g x (a, b) mixed" '6' +check 5142 "top let h (a, b) (c, d)" '14' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3374b968..3ee58e54 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — top-level `let f (a, b) = body` tuple-param + decl (+3 tests, 559 total). parse-decl-let (which lives outside + the ocaml-parse scope and lacks parse-pattern access) uses a + source-slicing approach: detect `(IDENT, ...)`, scan tokens to + matching `)`, slice the pattern source string, store as + (synth_name, pat_src). After collecting params, wrap the rhs + source string with `match SN with PAT_SRC -> (RHS_SRC)` for each + tuple-param, innermost first, then ocaml-parse the wrapped + string. End result is the same shape as the inner-let case: a + function whose body destructures a synthetic name. - 2026-05-09 Phase 4 — `let f (a, b) = body in body2` tuple-param on inner-let bindings (+3 tests, 556 total). Mirrors iteration 101's parse-fun change inside parse-let's parse-one!: same `(IDENT, ...)` From 7c405065711f1eda1476a73cf5e7e38e62979375 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 05:00:50 +0000 Subject: [PATCH 111/298] ocaml: phase 5.1 merge_sort.ml baseline (user mergesort, sum=44) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-implemented mergesort that exercises features added across the last few iterations: let rec split lst = match lst with | x :: y :: rest -> let (a, b) = split rest in (* iter 98 let-tuple destruct *) (x :: a, y :: b) | ... let rec merge xs ys = match xs with | x :: xs' -> match ys with (* nested match-in-match *) | y :: ys' -> ... ... List.fold_left (+) 0 (sort [...]) (* iter 89 (op) section *) Sum of [3;1;4;1;5;9;2;6;5;3;5] = 44 regardless of order, so the result is also a smoke test of the implementation correctness — if merge_sort drops or duplicates an element the sum diverges. 26 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/merge_sort.ml | 28 ++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 35 insertions(+) create mode 100644 lib/ocaml/baseline/merge_sort.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 904c4111..b2a4af71 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -16,6 +16,7 @@ "lambda_calc.ml": 7, "levenshtein.ml": 11, "memo_fib.ml": 75025, + "merge_sort.ml": 44, "module_use.ml": 3, "mutable_record.ml": 10, "option_match.ml": 5, diff --git a/lib/ocaml/baseline/merge_sort.ml b/lib/ocaml/baseline/merge_sort.ml new file mode 100644 index 00000000..cfcc30d6 --- /dev/null +++ b/lib/ocaml/baseline/merge_sort.ml @@ -0,0 +1,28 @@ +let rec split lst = + match lst with + | [] -> ([], []) + | [x] -> ([x], []) + | x :: y :: rest -> + let (a, b) = split rest in + (x :: a, y :: b) + +let rec merge xs ys = + match xs with + | [] -> ys + | x :: xs' -> + match ys with + | [] -> xs + | y :: ys' -> + if x <= y then x :: merge xs' (y :: ys') + else y :: merge (x :: xs') ys' + +let rec sort lst = + match lst with + | [] -> [] + | [x] -> [x] + | _ -> + let (a, b) = split lst in + merge (sort a) (sort b) +;; + +List.fold_left (+) 0 (sort [3;1;4;1;5;9;2;6;5;3;5]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3ee58e54..17e0c98c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — merge_sort.ml baseline (user-implemented + mergesort, sorted sum = 44). Stress-tests `let (a, b) = split rest + in (x :: a, y :: b)` (let-tuple destructuring inside a recursive + match arm), nested match-in-match for the merge merge step, and + the (op) operator section `(+)` as fold accumulator. 26 baseline + programs total. - 2026-05-09 Phase 4 — top-level `let f (a, b) = body` tuple-param decl (+3 tests, 559 total). parse-decl-let (which lives outside the ocaml-parse scope and lacks parse-pattern access) uses a From 7773c40337cddcc97d74c2abbeb58a7eb4324991 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 05:12:34 +0000 Subject: [PATCH 112/298] ocaml: phase 4 basic labeled / optional argument syntax (label dropped) (+3 tests, 562 total) Three parser changes: 1. at-app-start? returns true on op '~' or '?' so the app loop keeps consuming labeled args. 2. The app arg parser handles: ~name:VAL drop label, parse VAL as the arg ?name:VAL same ~name punning -- treat as (:var name) ?name same 3. try-consume-param! drops '~' or '?' and treats the following ident as a regular positional param name. Caveats: - Order in the call must match definition order; we don't reorder by label name. - Optional args don't auto-wrap in Some, so the function body sees the raw value for ?x:V. Lets us write idiomatic-looking OCaml even though the runtime is positional underneath: let f ~x ~y = x + y in f ~x:3 ~y:7 = 10 let x = 4 in let y = 5 in f ~x ~y = 20 (punning) let f ?x ~y = x + y in f ?x:1 ~y:2 = 3 --- lib/ocaml/parser.sx | 26 ++++++++++++++++++++++++++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 14 ++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 459e4fc2..bd2b77d8 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -353,6 +353,15 @@ (advance-tok!) (set-nth! wild-counter 0 (+ (nth wild-counter 0) 1)) (str "__wild_" (nth wild-counter 0)))) + ;; ~name / ?name labeled/optional param — drop the label + ;; prefix and treat as a positional ident name. Optional + ;; default `?(name = default)` is not handled here yet. + ((and (or (at-op? "~") (at-op? "?")) + (= (ocaml-tok-type (peek-tok-at 1)) "ident")) + (begin + (advance-tok!) ;; ~ or ? + (let ((nm (ocaml-tok-value (peek-tok)))) + (begin (advance-tok!) nm)))) ((check-tok? "ident" nil) (let ((nm (ocaml-tok-value (peek-tok)))) (begin (advance-tok!) nm))) @@ -565,6 +574,9 @@ ((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin"))) true) ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true) + ;; ~name or ?name (labeled/optional arg) — drop the + ;; label prefix and use the value as a positional arg. + ((and (= tt "op") (or (= tv "~") (= tv "?"))) true) (else false))))) (define parse-atom-postfix (fn () @@ -637,6 +649,20 @@ ((at-op? "!") (begin (advance-tok!) (list :deref (parse-atom-postfix)))) + ;; Labeled/optional arg ~name[:VAL] or ?name[:VAL] + ;; — drop the label prefix. With colon, parse VAL + ;; as the arg. Without, the variable named `name` + ;; (punning) becomes the arg. + ((or (at-op? "~") (at-op? "?")) + (begin + (advance-tok!) + (let ((nm (ocaml-tok-value + (consume! "ident" nil)))) + (cond + ((at-op? ":") + (begin (advance-tok!) + (parse-atom-postfix))) + (else (list :var nm)))))) (else (parse-atom-postfix))))) (begin (set! head (list :app head arg)) (loop)))))) (loop) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 2834ef4c..8723d255 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1396,6 +1396,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5142) (eval "(ocaml-run-program \"let h (a, b) (c, d) = a * b + c * d ;; h (1, 2) (3, 4)\")") +;; ── Labeled / optional args (label dropped) ────────────────── +(epoch 5150) +(eval "(ocaml-run \"let f ~x ~y = x + y in f ~x:3 ~y:7\")") +(epoch 5151) +(eval "(ocaml-run \"let f ~x ~y = x * y in let x = 4 in let y = 5 in f ~x ~y\")") +(epoch 5152) +(eval "(ocaml-run \"let f ?x ~y = x + y in f ?x:1 ~y:2\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2219,6 +2227,11 @@ check 5140 "top let f (a, b) = a+b" '10' check 5141 "top let g x (a, b) mixed" '6' check 5142 "top let h (a, b) (c, d)" '14' +# ── Labeled / optional args (label dropped, positional) ───────── +check 5150 "f ~x:3 ~y:7 sum" '10' +check 5151 "f ~x ~y punning" '20' +check 5152 "f ?x:1 ~y:2 (no Some wrap)" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 17e0c98c..7a4264be 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,20 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — basic labeled / optional argument syntax + (label dropped, positional semantics) (+3 tests, 562 total). Three + parser changes: + (1) `at-app-start?` returns true on op `~` or `?` so the app loop + keeps consuming labeled args; + (2) the app arg parser handles `~name:VAL` (drop label, parse VAL), + `?name:VAL` (same), and `~name` punning (treat as `(:var name)`); + (3) `try-consume-param!` drops `~` / `?` and treats the following + ident as a regular positional param name. + Order in the call must match definition order — we don't reorder + args by label name. Optional args don't auto-wrap in Some, so the + function body sees the raw value for `?x:V`. Lets us write + `let f ~x ~y = x + y in f ~x:3 ~y:7` and `let x = 4 in let y = 5 + in f ~x ~y` (punning). - 2026-05-09 Phase 5.1 — merge_sort.ml baseline (user-implemented mergesort, sorted sum = 44). Stress-tests `let (a, b) = split rest in (x :: a, y :: b)` (let-tuple destructuring inside a recursive From 5d33f8f20b513038081a4b3fa4b02057fa8f460c Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 05:24:37 +0000 Subject: [PATCH 113/298] ocaml: phase 6 Filename module + Char.compare/equal/escaped (+7 tests, 569 total) Filename module (forward-slash only, no Windows-separator detection): basename '/foo/bar/baz.ml' = 'baz.ml' dirname '/foo/bar/baz.ml' = '/foo/bar' extension 'baz.tar.gz' = '.gz' chop_extension 'hello.ml' = 'hello' concat 'a' 'b' = 'a/b' is_relative 'a/b' = true current_dir_name = '.', parent_dir_name = '..', dir_sep = '/' Char additions: equal a b = (a = b) compare a b = code(a) - code(b) escaped '\n' = '\\n' (likewise t, r, \\, ") --- lib/ocaml/runtime.sx | 63 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 29 ++++++++++++++++++++ plans/ocaml-on-sx.md | 6 +++++ 3 files changed, 98 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 4fab4505..62eee777 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -474,6 +474,69 @@ let is_alnum c = is_alpha c || is_digit c let is_whitespace c = c = \" \" || c = \"\\t\" || c = \"\\n\" || c = \"\\r\" + let equal a b = a = b + let compare a b = _char_code a - _char_code b + let escaped c = + if c = \"\\n\" then \"\\\\n\" + else if c = \"\\t\" then \"\\\\t\" + else if c = \"\\r\" then \"\\\\r\" + else if c = \"\\\\\" then \"\\\\\\\\\" + else if c = \"\\\"\" then \"\\\\\\\"\" + else c + end ;; + + module Filename = struct + (* Minimal Filename: basename / dirname / extension / concat / + chop_suffix. Forward-slash only — doesn't try to detect + Windows-style separators. *) + let _last_slash s = + let n = _string_length s in + let rec aux i = + if i < 0 then -1 + else if _string_get s i = \"/\" then i + else aux (i - 1) + in + aux (n - 1) + + let basename s = + let i = _last_slash s in + if i < 0 then s + else _string_sub s (i + 1) (_string_length s - i - 1) + + let dirname s = + let i = _last_slash s in + if i < 0 then \".\" + else if i = 0 then \"/\" + else _string_sub s 0 i + + let extension s = + let b = basename s in + let n = _string_length b in + let rec aux i = + if i < 0 then \"\" + else if _string_get b i = \".\" then + _string_sub b i (n - i) + else aux (i - 1) + in + aux (n - 1) + + let chop_extension s = + let ext = extension s in + let nx = _string_length ext in + if nx = 0 then s + else _string_sub s 0 (_string_length s - nx) + + let concat a b = + if _string_length a = 0 then b + else if _string_get a (_string_length a - 1) = \"/\" then a ^ b + else a ^ \"/\" ^ b + + let is_relative s = + _string_length s = 0 || _string_get s 0 <> \"/\" + + let current_dir_name = \".\" + let parent_dir_name = \"..\" + let dir_sep = \"/\" end ;; module Int = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 8723d255..2baaaa56 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1404,6 +1404,24 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5152) (eval "(ocaml-run \"let f ?x ~y = x + y in f ?x:1 ~y:2\")") +;; ── Filename module ────────────────────────────────────────── +(epoch 5160) +(eval "(ocaml-run \"Filename.basename \\\"/foo/bar/baz.ml\\\"\")") +(epoch 5161) +(eval "(ocaml-run \"Filename.dirname \\\"/foo/bar/baz.ml\\\"\")") +(epoch 5162) +(eval "(ocaml-run \"Filename.extension \\\"baz.tar.gz\\\"\")") +(epoch 5163) +(eval "(ocaml-run \"Filename.concat \\\"a\\\" \\\"b\\\"\")") +(epoch 5164) +(eval "(ocaml-run \"Filename.chop_extension \\\"hello.ml\\\"\")") + +;; ── Char.compare / equal / escaped ───────────────────────── +(epoch 5170) +(eval "(ocaml-run \"Char.compare \\\"b\\\" \\\"a\\\"\")") +(epoch 5171) +(eval "(ocaml-run \"Char.equal \\\"a\\\" \\\"a\\\"\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2232,6 +2250,17 @@ check 5150 "f ~x:3 ~y:7 sum" '10' check 5151 "f ~x ~y punning" '20' check 5152 "f ?x:1 ~y:2 (no Some wrap)" '3' +# ── Filename module ───────────────────────────────────────────── +check 5160 "basename /foo/bar/baz.ml" '"baz.ml"' +check 5161 "dirname /foo/bar/baz.ml" '"/foo/bar"' +check 5162 "extension baz.tar.gz" '".gz"' +check 5163 "concat a b" '"a/b"' +check 5164 "chop_extension hello.ml" '"hello"' + +# ── Char.compare / equal ──────────────────────────────────────── +check 5170 "Char.compare b a" '1' +check 5171 "Char.equal a a" 'true' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7a4264be..dd2dd8de 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Filename module + Char.compare/equal/escaped + (+7 tests, 569 total). Filename: basename, dirname, extension, + chop_extension, concat, is_relative + dir_sep / current_dir_name / + parent_dir_name constants. Forward-slash only, doesn't try to + detect Windows separators. Char additions: equal, compare (via + code subtraction), escaped (handles `\n`/`\t`/`\r`/`\\`/`\"`). - 2026-05-09 Phase 4 — basic labeled / optional argument syntax (label dropped, positional semantics) (+3 tests, 562 total). Three parser changes: From ec12b721e8186649b1578b8869590d7997b1662a Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 05:35:19 +0000 Subject: [PATCH 114/298] ocaml: phase 4 Set.Make / Map.Make functor application smoke tests (+3 tests, 572 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functors were already wired through ocaml-make-functor in eval.sx (curried host closure consuming module dicts) but had no explicit tests for the user-defined Ord application path. This commit adds three smoke tests that confirm: module IntOrd = struct let compare a b = a - b end module S = Set.Make (IntOrd) S.elements (fold-add [5;1;3;1;5]) sums to 9 (dedupe + sort) S.mem 2 (S.add 1 (S.add 2 (S.add 3 S.empty))) = true M.cardinal (M.add 1 'a' (M.add 2 'b' M.empty)) = 2 The Ord parameter is properly threaded through the functor body — elements are sorted in compare order and dedupe works. --- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 2baaaa56..9135631b 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1422,6 +1422,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5171) (eval "(ocaml-run \"Char.equal \\\"a\\\" \\\"a\\\"\")") +;; ── Set.Make / Map.Make functor application ────────────────── +(epoch 5180) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = a - b end ;; module S = Set.Make (IntOrd) ;; let s = List.fold_left (fun s x -> S.add x s) S.empty [5;1;3;1;5] ;; List.fold_left (+) 0 (S.elements s)\")") +(epoch 5181) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = a - b end ;; module S = Set.Make (IntOrd) ;; let s = S.add 1 (S.add 2 (S.add 3 S.empty)) ;; S.mem 2 s\")") +(epoch 5182) +(eval "(ocaml-run-program \"module IntOrd = struct let compare a b = a - b end ;; module M = Map.Make (IntOrd) ;; let m = M.add 1 \\\"a\\\" (M.add 2 \\\"b\\\" M.empty) ;; M.cardinal m\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2261,6 +2269,11 @@ check 5164 "chop_extension hello.ml" '"hello"' check 5170 "Char.compare b a" '1' check 5171 "Char.equal a a" 'true' +# ── Set.Make / Map.Make functor application ───────────────────── +check 5180 "Set.Make dedupe sum" '9' +check 5181 "Set.Make mem" 'true' +check 5182 "Map.Make cardinal" '2' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index dd2dd8de..bce487b0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — Set.Make / Map.Make functor application + smoke tests (+3 tests, 572 total). Functors were already wired + through ocaml-make-functor in eval.sx but had no explicit tests + for the user-defined Ord application path. Confirms that + `module S = Set.Make (IntOrd) ;; let s = ... in S.elements s`, + `S.mem 2 s`, and `Map.Make (IntOrd) ;; M.cardinal m` all work end + to end. - 2026-05-09 Phase 6 — Filename module + Char.compare/equal/escaped (+7 tests, 569 total). Filename: basename, dirname, extension, chop_extension, concat, is_relative + dir_sep / current_dir_name / From 24416f8cef3be78b470f518ab31d76331e96bb61 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 05:44:35 +0000 Subject: [PATCH 115/298] ocaml: phase 5.1 unique_set.ml baseline (Set.Make + IntOrd, count = 9) First baseline that exercises the functor pipeline end to end: module IntOrd = struct type t = int let compare a b = a - b end module IntSet = Set.Make (IntOrd) let unique_count xs = let s = List.fold_left (fun s x -> IntSet.add x s) IntSet.empty xs in IntSet.cardinal s Counts unique elements in [3;1;4;1;5;9;2;6;5;3;5;8;9;7;9]: {1,2,3,4,5,6,7,8,9} -> 9 The input has 15 elements with 9 unique values. The 'type t = int' declaration in IntOrd is required by real OCaml; OCaml-on-SX is dynamic and would accept it without, but we include it for source fidelity. 27 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/unique_set.ml | 14 ++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/unique_set.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b2a4af71..6f5186e2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -26,5 +26,6 @@ "roman.ml": 44, "sieve.ml": 15, "sum_squares.ml": 385, + "unique_set.ml": 9, "word_count.ml": 3 } diff --git a/lib/ocaml/baseline/unique_set.ml b/lib/ocaml/baseline/unique_set.ml new file mode 100644 index 00000000..3a3ba483 --- /dev/null +++ b/lib/ocaml/baseline/unique_set.ml @@ -0,0 +1,14 @@ +module IntOrd = struct + type t = int + let compare a b = a - b +end + +module IntSet = Set.Make (IntOrd) + +let unique_count xs = + let s = List.fold_left (fun s x -> IntSet.add x s) IntSet.empty xs in + IntSet.cardinal s + +;; + +unique_count [3; 1; 4; 1; 5; 9; 2; 6; 5; 3; 5; 8; 9; 7; 9] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bce487b0..0c9fe76f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — unique_set.ml baseline (Set.Make + IntOrd + functor app, count uniques in [3;1;4;1;5;9;2;6;5;3;5;8;9;7;9] → + 9). First baseline that exercises the functor pipeline end to + end: defines an Ord module with `type t = int` + `compare`, applies + Set.Make to it, then folds the input list adding each element to + the set and queries `IntSet.cardinal`. 27 baseline programs total. - 2026-05-09 Phase 4 — Set.Make / Map.Make functor application smoke tests (+3 tests, 572 total). Functors were already wired through ocaml-make-functor in eval.sx but had no explicit tests From 2d519461c435cb016fc31a70850d14ace64c2a13 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 05:56:10 +0000 Subject: [PATCH 116/298] ocaml: phase 6 Seq module (eager, list-backed) (+4 tests, 576 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real OCaml's Seq.t is 'unit -> Cons of elt * Seq.t | Nil' — a lazy thunk that lets you build infinite sequences. Ours is just a list, which gives the right shape for everything in baseline programs that don't rely on laziness (taking from infinite sequences would force memory). API: empty, cons, return, is_empty, iter, iteri, map, filter, filter_map, fold_left, length, take, drop, append, to_list, of_list, init, unfold. unfold takes a step fn 'acc -> Option (elt * acc)' and threads through until it returns None: Seq.fold_left (+) 0 (Seq.unfold (fun n -> if n > 4 then None else Some (n, n + 1)) 1) = 1 + 2 + 3 + 4 = 10 --- lib/ocaml/runtime.sx | 70 ++++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 16 ++++++++++ plans/ocaml-on-sx.md | 10 +++++++ 3 files changed, 96 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 62eee777..c5eb84d0 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -696,6 +696,76 @@ let bits () = int 1073741824 end ;; + module Seq = struct + (* Eager list-backed Seq — no laziness. Real OCaml's Seq is a + thunk producing Cons / Nil; ours is just a list. Adequate for + most baseline programs that don't rely on infinite sequences. *) + let empty = [] + let cons x s = x :: s + let return x = [x] + let is_empty s = match s with [] -> true | _ -> false + + let rec iter f s = + match s with + | [] -> () + | h :: t -> f h; iter f t + + let rec iteri f s = + let rec go i xs = + match xs with + | [] -> () + | h :: t -> f i h; go (i + 1) t + in + go 0 s + + let rec map f s = match s with [] -> [] | h :: t -> f h :: map f t + + let rec filter p s = + match s with + | [] -> [] + | h :: t -> if p h then h :: filter p t else filter p t + + let rec filter_map f s = + match s with + | [] -> [] + | h :: t -> + match f h with + | Some v -> v :: filter_map f t + | None -> filter_map f t + + let rec fold_left f init s = + match s with + | [] -> init + | h :: t -> fold_left f (f init h) t + + let rec length s = + match s with [] -> 0 | _ :: t -> 1 + length t + + let rec take n s = + if n <= 0 then [] + else match s with [] -> [] | h :: t -> h :: take (n - 1) t + + let rec drop n s = + if n <= 0 then s + else match s with [] -> [] | _ :: t -> drop (n - 1) t + + let rec append a b = + match a with [] -> b | h :: t -> h :: append t b + + let to_list s = s + let of_list xs = xs + + let rec init n f = + if n = 0 then [] else + let rec build i = if i = n then [] else f i :: build (i + 1) in + build 0 + + let rec unfold f acc = + match f acc with + | None -> [] + | Some (x, acc') -> x :: unfold f acc' + end ;; + module Lazy = struct let force lz = _lazy_force lz end ;; diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 9135631b..58868059 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1430,6 +1430,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5182) (eval "(ocaml-run-program \"module IntOrd = struct let compare a b = a - b end ;; module M = Map.Make (IntOrd) ;; let m = M.add 1 \\\"a\\\" (M.add 2 \\\"b\\\" M.empty) ;; M.cardinal m\")") +;; ── Seq module (eager list-backed) ──────────────────────────── +(epoch 5190) +(eval "(ocaml-run \"Seq.fold_left (+) 0 (Seq.map (fun x -> x * x) (Seq.of_list [1;2;3;4]))\")") +(epoch 5191) +(eval "(ocaml-run \"Seq.length (Seq.filter (fun x -> x mod 2 = 0) (Seq.of_list [1;2;3;4;5;6;7;8;9;10]))\")") +(epoch 5192) +(eval "(ocaml-run \"Seq.fold_left (+) 0 (Seq.init 5 (fun i -> i * 2))\")") +(epoch 5193) +(eval "(ocaml-run \"Seq.fold_left (+) 0 (Seq.unfold (fun n -> if n > 4 then None else Some (n, n + 1)) 1)\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2274,6 +2284,12 @@ check 5180 "Set.Make dedupe sum" '9' check 5181 "Set.Make mem" 'true' check 5182 "Map.Make cardinal" '2' +# ── Seq module (eager list-backed) ────────────────────────────── +check 5190 "Seq fold map squares" '30' +check 5191 "Seq filter evens" '5' +check 5192 "Seq init 5 i*2" '20' +check 5193 "Seq unfold 1..4 sum" '10' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0c9fe76f..212bfbb7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Seq module (eager, list-backed) (+4 tests, + 576 total). Real OCaml's Seq is lazy (a thunk producing + Cons / Nil); ours is just a list, which is adequate for most + baseline programs that don't rely on infinite sequences. API: + empty, cons, return, is_empty, iter, iteri, map, filter, + filter_map, fold_left, length, take, drop, append, to_list, + of_list, init, unfold. unfold takes a step fn `acc -> Option (elt + * acc)` and threads through until it returns None. Lets us write + `Seq.fold_left (+) 0 (Seq.unfold (fun n -> if n > 4 then None else + Some (n, n + 1)) 1)` → 10. - 2026-05-09 Phase 5.1 — unique_set.ml baseline (Set.Make + IntOrd functor app, count uniques in [3;1;4;1;5;9;2;6;5;3;5;8;9;7;9] → 9). First baseline that exercises the functor pipeline end to From 7ca5bfbb70d8ae3d5d8acf1df6f2a7beabf72b40 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 06:05:31 +0000 Subject: [PATCH 117/298] ocaml: phase 5.1 fraction.ml baseline (rational arithmetic, 4/3 -> num+den=7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defines: type frac = { num : int; den : int } let rec gcd a b = if b = 0 then a else gcd b (a mod b) let make n d = (* canonicalise: gcd-reduce and force den > 0 *) let add x y = make (x.num * y.den + y.num * x.den) (x.den * y.den) let mul x y = make (x.num * y.num) (x.den * y.den) Test: let r = add (make 1 2) (make 1 3) in (* 5/6 *) let s = mul (make 2 3) (make 3 4) in (* 1/2 *) let t = add r s in (* 5/6 + 1/2 = 4/3 *) t.num + t.den (* = 7 *) Exercises records, recursive gcd, mod, abs, integer division (the truncate-toward-zero semantics from iter 94 are essential here — make would diverge from real OCaml's behaviour with float division). 28 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/fraction.ml | 20 ++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/fraction.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6f5186e2..b2697a83 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -10,6 +10,7 @@ "exception_handle.ml": 4, "expr_eval.ml": 16, "factorial.ml": 3628800, + "fraction.ml": 7, "frequency.ml": 5, "fizzbuzz.ml": 57, "list_ops.ml": 30, diff --git a/lib/ocaml/baseline/fraction.ml b/lib/ocaml/baseline/fraction.ml new file mode 100644 index 00000000..e67715f0 --- /dev/null +++ b/lib/ocaml/baseline/fraction.ml @@ -0,0 +1,20 @@ +type frac = { num : int; den : int } + +let rec gcd a b = if b = 0 then a else gcd b (a mod b) + +let make n d = + let g = gcd (abs n) (abs d) in + if d < 0 then { num = -n / g; den = -d / g } + else { num = n / g; den = d / g } + +let add x y = + make (x.num * y.den + y.num * x.den) (x.den * y.den) + +let mul x y = make (x.num * y.num) (x.den * y.den) + +;; + +let r = add (make 1 2) (make 1 3) in +let s = mul (make 2 3) (make 3 4) in +let t = add r s in +t.num + t.den diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 212bfbb7..301ede2f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — fraction.ml baseline (rational arithmetic + via record + gcd canonicalization). Defines `type frac = { num; + den }`, `make` that reduces via gcd and forces den > 0, `add` and + `mul` constructors. Computes (1/2 + 1/3) + (2/3 * 3/4) = 4/3, sums + num + den = 7. Exercises records, recursive gcd, `mod`, `abs`, + integer division, and the new `Int.rem`-style truncate-zero + division semantics from iteration 94. 28 baseline programs total. - 2026-05-09 Phase 6 — Seq module (eager, list-backed) (+4 tests, 576 total). Real OCaml's Seq is lazy (a thunk producing Cons / Nil); ours is just a list, which is adequate for most From ddd1e40d007c1063508125d3f49eee3ef673b3c2 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 06:15:03 +0000 Subject: [PATCH 118/298] ocaml: phase 5.1 bag.ml baseline + String.equal/compare/cat/empty (+3 tests, 579 total) bag.ml: split a sentence on spaces, count each word in a Hashtbl, return the maximum count via Hashtbl.fold. count_words 'the quick brown fox jumps over the lazy dog the fox' -> Hashtbl with 'the' = 3 as the max -> 3 Exercises String.split_on_char + Hashtbl.find_opt/replace + Hashtbl.fold (k v acc -> ...). Together with frequency.ml from iter 84 we now have two Hashtbl-counting baselines exercising slightly different idioms. 29 baseline programs total. String additions: equal a b = a = b compare a b = -1 / 0 / 1 via host < / > cat a b = a ^ b empty = '' (constant) --- lib/ocaml/baseline/bag.ml | 18 ++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + lib/ocaml/runtime.sx | 5 +++++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 5 files changed, 45 insertions(+) create mode 100644 lib/ocaml/baseline/bag.ml diff --git a/lib/ocaml/baseline/bag.ml b/lib/ocaml/baseline/bag.ml new file mode 100644 index 00000000..661c3523 --- /dev/null +++ b/lib/ocaml/baseline/bag.ml @@ -0,0 +1,18 @@ +let count_words text = + let words = String.split_on_char ' ' text in + let counts = Hashtbl.create 8 in + List.iter (fun w -> + let n = match Hashtbl.find_opt counts w with + | Some n -> n + 1 + | None -> 1 + in + Hashtbl.replace counts w n + ) words; + counts + +let max_count counts = + Hashtbl.fold (fun _ v acc -> if v > acc then v else acc) counts 0 + +;; + +max_count (count_words "the quick brown fox jumps over the lazy dog the fox") diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b2697a83..b38d6a77 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,5 +1,6 @@ { "anagrams.ml": 3, + "bag.ml": 3, "bfs.ml": 6, "btree.ml": 39, "brainfuck.ml": 75, diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index c5eb84d0..5f21e4f1 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -447,6 +447,11 @@ | h :: t -> aux t (acc ^ h) in aux xs \"\" + let equal a b = a = b + let compare a b = + if a < b then -1 else if a > b then 1 else 0 + let cat a b = a ^ b + let empty = \"\" end ;; module Bytes = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 58868059..9d5d1e24 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1440,6 +1440,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5193) (eval "(ocaml-run \"Seq.fold_left (+) 0 (Seq.unfold (fun n -> if n > 4 then None else Some (n, n + 1)) 1)\")") +;; ── String.equal / compare / cat ───────────────────────────── +(epoch 5200) +(eval "(ocaml-run \"String.equal \\\"abc\\\" \\\"abc\\\"\")") +(epoch 5201) +(eval "(ocaml-run \"String.compare \\\"banana\\\" \\\"apple\\\"\")") +(epoch 5202) +(eval "(ocaml-run \"String.cat \\\"hello \\\" \\\"world\\\"\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2290,6 +2298,11 @@ check 5191 "Seq filter evens" '5' check 5192 "Seq init 5 i*2" '20' check 5193 "Seq unfold 1..4 sum" '10' +# ── String.equal / compare / cat ──────────────────────────────── +check 5200 "String.equal abc abc" 'true' +check 5201 "String.compare banana apple" '1' +check 5202 "String.cat hello world" '"hello world"' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 301ede2f..cd07e8ac 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — bag.ml baseline + String.equal/compare/cat/ + empty (+3 tests, 579 total). bag.ml: split a sentence on spaces, + count word frequency in a Hashtbl, return the maximum count. + Sentence "the quick brown fox jumps over the lazy dog the fox" has + "the"×3 as the most frequent → 3. Exercises String.split_on_char + + Hashtbl.find_opt/replace + Hashtbl.fold over (k, v) tuples. 29 + baseline programs total. String additions: equal, compare (via host + `<`/`>`), cat (alias of `^`), empty. - 2026-05-09 Phase 5.1 — fraction.ml baseline (rational arithmetic via record + gcd canonicalization). Defines `type frac = { num; den }`, `make` that reduces via gcd and forces den > 0, `add` and From 4d32c80a99814284620d225f67f9daa0dbf48f88 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 06:26:33 +0000 Subject: [PATCH 119/298] ocaml: phase 6 Bool module + Option.equal/Option.compare (+5 tests, 584 total) Bool module: equal a b = a = b compare a b = 0 if equal, 1 if a, -1 if b (false < true) to_string 'true' / 'false' of_string s = s = 'true' not_ wraps host not to_int true=1, false=0 Option additions (take eq/cmp parameter for the inner value): equal eq a b None=None, otherwise eq the inner values compare cmp a b None < Some _; otherwise cmp inner Option.equal (=) (Some 1) (Some 1) = true Option.equal (=) (Some 1) None = false Option.compare compare (Some 5) (Some 3) = 1 --- lib/ocaml/runtime.sx | 24 ++++++++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 49 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 5f21e4f1..a9e204fb 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -321,6 +321,18 @@ | None -> Error none_v | Some x -> Ok x + let equal eq a b = + match a with + | None -> (match b with None -> true | Some _ -> false) + | Some x -> + (match b with None -> false | Some y -> eq x y) + + let compare cmp a b = + match a with + | None -> (match b with None -> 0 | Some _ -> -1) + | Some x -> + (match b with None -> 1 | Some y -> cmp x y) + let some x = Some x let none = None end ;; @@ -464,6 +476,18 @@ let sub s i n = String.sub s i n end ;; + module Bool = struct + let equal a b = a = b + let compare a b = + if a = b then 0 + else if a then 1 + else -1 + let to_string b = if b then \"true\" else \"false\" + let of_string s = s = \"true\" + let not_ b = not b + let to_int b = if b then 1 else 0 + end ;; + module Char = struct let code c = _char_code c let chr n = _char_chr n diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 9d5d1e24..7d040fe7 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1448,6 +1448,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5202) (eval "(ocaml-run \"String.cat \\\"hello \\\" \\\"world\\\"\")") +;; ── Bool module + Option.equal/compare ─────────────────────── +(epoch 5210) +(eval "(ocaml-run \"Bool.to_string true ^ \\\"-\\\" ^ Bool.to_string false\")") +(epoch 5211) +(eval "(ocaml-run \"Bool.compare true false\")") +(epoch 5212) +(eval "(ocaml-run \"Option.equal (=) (Some 1) (Some 1)\")") +(epoch 5213) +(eval "(ocaml-run \"Option.equal (=) (Some 1) None\")") +(epoch 5214) +(eval "(ocaml-run \"Option.compare compare (Some 5) (Some 3)\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2303,6 +2315,13 @@ check 5200 "String.equal abc abc" 'true' check 5201 "String.compare banana apple" '1' check 5202 "String.cat hello world" '"hello world"' +# ── Bool module + Option.equal/compare ───────────────────────── +check 5210 "Bool.to_string true/false" '"true-false"' +check 5211 "Bool.compare true false" '1' +check 5212 "Option.equal Some 1 Some 1" 'true' +check 5213 "Option.equal Some 1 None" 'false' +check 5214 "Option.compare Some 5 Some 3" '1' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index cd07e8ac..dc5c66fc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Bool module + Option.equal / Option.compare + (+5 tests, 584 total). Bool: equal, compare (false < true via if + ladder), to_string, of_string, not_, to_int. Option additions + take an `eq` or `cmp` parameter for the inner-value check, mirroring + real OCaml's signature: `Option.equal eq a b`, `Option.compare cmp + a b`. None < Some _ for compare. - 2026-05-09 Phase 5.1 — bag.ml baseline + String.equal/compare/cat/ empty (+3 tests, 579 total). bag.ml: split a sentence on spaces, count word frequency in a Hashtbl, return the maximum count. From 98ba772acd24a9b06446da6bb9df57678dd31da3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 06:35:42 +0000 Subject: [PATCH 120/298] ocaml: phase 6 List.equal / List.compare (+5 tests, 589 total) Both take an inner predicate / comparator and walk both lists in lockstep: equal eq a b short-circuits on first mismatch compare cmp a b -1 if a is a strict prefix 1 if b is 0 if both empty otherwise first non-zero element comparison Mirrors real OCaml's signatures. List.equal (=) [1;2;3] [1;2;3] = true List.equal (=) [1;2;3] [1;2;4] = false List.compare compare [1;2;3] [1;2;4] = -1 List.compare compare [1;2] [1;2;3] = -1 List.compare compare [] [] = 0 --- lib/ocaml/runtime.sx | 18 ++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 44 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index a9e204fb..b9ac30d9 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -194,6 +194,24 @@ | Some v -> Some v | None -> find_map f t + let rec equal eq a b = + match a with + | [] -> (match b with [] -> true | _ -> false) + | h :: t -> + (match b with + | [] -> false + | h2 :: t2 -> if eq h h2 then equal eq t t2 else false) + + let rec compare cmp a b = + match a with + | [] -> (match b with [] -> 0 | _ -> -1) + | h :: t -> + (match b with + | [] -> 1 + | h2 :: t2 -> + let c = cmp h h2 in + if c <> 0 then c else compare cmp t t2) + let rec combine xs ys = match xs with | [] -> (match ys with diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 7d040fe7..76587eac 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1460,6 +1460,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5214) (eval "(ocaml-run \"Option.compare compare (Some 5) (Some 3)\")") +;; ── List.equal / List.compare ──────────────────────────────── +(epoch 5220) +(eval "(ocaml-run \"List.equal (=) [1;2;3] [1;2;3]\")") +(epoch 5221) +(eval "(ocaml-run \"List.equal (=) [1;2;3] [1;2;4]\")") +(epoch 5222) +(eval "(ocaml-run \"List.compare compare [1;2;3] [1;2;4]\")") +(epoch 5223) +(eval "(ocaml-run \"List.compare compare [1;2] [1;2;3]\")") +(epoch 5224) +(eval "(ocaml-run \"List.compare compare [] []\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2322,6 +2334,13 @@ check 5212 "Option.equal Some 1 Some 1" 'true' check 5213 "Option.equal Some 1 None" 'false' check 5214 "Option.compare Some 5 Some 3" '1' +# ── List.equal / List.compare ─────────────────────────────────── +check 5220 "List.equal [1;2;3] [1;2;3]" 'true' +check 5221 "List.equal [1;2;3] [1;2;4]" 'false' +check 5222 "List.compare [1;2;3] [1;2;4]" '-1' +check 5223 "List.compare [1;2] [1;2;3]" '-1' +check 5224 "List.compare [] []" '0' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index dc5c66fc..0bd9a913 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — List.equal / List.compare (+5 tests, 589 + total). Both take an inner predicate / comparator and walk both + lists in lockstep. equal short-circuits on first mismatch. + compare returns -1 if a is a strict prefix, 1 if b is, 0 if both + empty, otherwise the first non-zero element comparison. Mirrors + real OCaml's signatures: `List.equal eq a b`, `List.compare cmp + a b`. - 2026-05-09 Phase 6 — Bool module + Option.equal / Option.compare (+5 tests, 584 total). Bool: equal, compare (false < true via if ladder), to_string, of_string, not_, to_int. Option additions From 0c3b5d21faa5fa26a756899336bba2018329841e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 06:47:47 +0000 Subject: [PATCH 121/298] ocaml: phase 5.1 safe_div.ml baseline + Result.equal/compare/iter_error (+3 tests, 592 total) safe_div.ml: integer division returning Result. Sum-safe folds pairs, skipping the Error branches. [(10,2); (20,4); (30,0); (50,5)] -> 5 + 5 + 0 + 10 = 20 Result module additions (mirroring real OCaml's signatures): equal eq_ok eq_err a b compare cmp_ok cmp_err a b Ok < Error (i.e. Ok x compared to Error e returns -1) iter_error f r Result.equal (=) (=) (Ok 1) (Ok 1) = true Result.compare compare compare (Ok 5) (Ok 3) = 1 Result.compare compare compare (Ok 1) (Error _) = -1 30 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/safe_div.ml | 14 ++++++++++++++ lib/ocaml/runtime.sx | 19 +++++++++++++++++++ lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 5 files changed, 54 insertions(+) create mode 100644 lib/ocaml/baseline/safe_div.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b38d6a77..7a081af7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -26,6 +26,7 @@ "queens.ml": 2, "quicksort.ml": 44, "roman.ml": 44, + "safe_div.ml": 20, "sieve.ml": 15, "sum_squares.ml": 385, "unique_set.ml": 9, diff --git a/lib/ocaml/baseline/safe_div.ml b/lib/ocaml/baseline/safe_div.ml new file mode 100644 index 00000000..93e16e29 --- /dev/null +++ b/lib/ocaml/baseline/safe_div.ml @@ -0,0 +1,14 @@ +let safe_div a b = + if b = 0 then Error "division by zero" + else Ok (a / b) + +let sum_safe pairs = + List.fold_left (fun acc (a, b) -> + match safe_div a b with + | Ok q -> acc + q + | Error _ -> acc + ) 0 pairs + +;; + +sum_safe [(10, 2); (20, 4); (30, 0); (50, 5)] diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index b9ac30d9..132d7586 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -410,6 +410,25 @@ match r with | Ok x -> ok_f x | Error e -> err_f e + + let equal eq_ok eq_err a b = + match a with + | Ok x -> + (match b with Ok y -> eq_ok x y | Error _ -> false) + | Error e -> + (match b with Ok _ -> false | Error e2 -> eq_err e e2) + + let compare cmp_ok cmp_err a b = + match a with + | Ok x -> + (match b with Ok y -> cmp_ok x y | Error _ -> -1) + | Error e -> + (match b with Ok _ -> 1 | Error e2 -> cmp_err e e2) + + let iter_error f r = + match r with + | Ok _ -> () + | Error e -> f e end ;; module String = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 76587eac..1a2b5041 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1472,6 +1472,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5224) (eval "(ocaml-run \"List.compare compare [] []\")") +;; ── Result.equal / Result.compare ──────────────────────────── +(epoch 5230) +(eval "(ocaml-run \"Result.equal (=) (=) (Ok 1) (Ok 1)\")") +(epoch 5231) +(eval "(ocaml-run \"Result.compare compare compare (Ok 5) (Ok 3)\")") +(epoch 5232) +(eval "(ocaml-run \"Result.compare compare compare (Ok 1) (Error \\\"fail\\\")\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2341,6 +2349,11 @@ check 5222 "List.compare [1;2;3] [1;2;4]" '-1' check 5223 "List.compare [1;2] [1;2;3]" '-1' check 5224 "List.compare [] []" '0' +# ── Result.equal / Result.compare ─────────────────────────────── +check 5230 "Result.equal Ok 1 Ok 1" 'true' +check 5231 "Result.compare Ok 5 Ok 3" '1' +check 5232 "Result.compare Ok < Error" '-1' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0bd9a913..3b0ad6db 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — safe_div.ml baseline + Result.equal / + compare / iter_error (+3 tests, 592 total). safe_div divides only + if divisor non-zero, returns `Error "..."` otherwise. sum_safe folds + pairs with `Ok q -> acc+q | Error _ -> acc`. + `[(10,2);(20,4);(30,0);(50,5)]` → 5+5+0+10 = 20. Result additions: + equal/compare take separate eq/cmp for Ok and Error sides; Ok < Error + (-1) and Error > Ok (1). 30 baseline programs total. - 2026-05-09 Phase 6 — List.equal / List.compare (+5 tests, 589 total). Both take an inner predicate / comparator and walk both lists in lockstep. equal short-circuits on first mismatch. From 97a8c06690fa786b6ac3911ed46a40aeb42bd9b2 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 06:59:22 +0000 Subject: [PATCH 122/298] ocaml: phase 5.1 balance.ml baseline (paren/bracket/brace balance via Stack) is_balanced walks a string; on each char: '(', '[', '{' -> Stack.push c ')', ']', '}' -> require stack non-empty AND top = expected opener, else mark ok = false others -> skip At end: !ok && Stack.is_empty stack. Five test cases: '({[abc]d}e)' -> true '(a]' -> false (no matching opener) '{[}]' -> false (mismatched closer) '(())' -> true '' -> true Sum of (if balanced then 1 else 0) -> 3. Exercises: Stack.create / push / pop / is_empty s.[!i] string indexing while loop + bool ref short-circuit multi-arm if/else if/else if dispatch 31 baseline programs total. --- lib/ocaml/baseline/balance.ml | 25 +++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 33 insertions(+) create mode 100644 lib/ocaml/baseline/balance.ml diff --git a/lib/ocaml/baseline/balance.ml b/lib/ocaml/baseline/balance.ml new file mode 100644 index 00000000..5afcafa7 --- /dev/null +++ b/lib/ocaml/baseline/balance.ml @@ -0,0 +1,25 @@ +let is_balanced s = + let stack = Stack.create () in + let n = String.length s in + let ok = ref true in + let i = ref 0 in + while !i < n && !ok do + let c = s.[!i] in + (if c = '(' || c = '[' || c = '{' then Stack.push c stack + else if c = ')' then + (if Stack.is_empty stack || Stack.pop stack <> '(' then ok := false) + else if c = ']' then + (if Stack.is_empty stack || Stack.pop stack <> '[' then ok := false) + else if c = '}' then + (if Stack.is_empty stack || Stack.pop stack <> '{' then ok := false)); + i := !i + 1 + done; + !ok && Stack.is_empty stack + +;; + +(if is_balanced "({[abc]d}e)" then 1 else 0) + +(if is_balanced "(a]" then 1 else 0) + +(if is_balanced "{[}]" then 1 else 0) + +(if is_balanced "(())" then 1 else 0) + +(if is_balanced "" then 1 else 0) diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7a081af7..47733ffd 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,6 +1,7 @@ { "anagrams.ml": 3, "bag.ml": 3, + "balance.ml": 3, "bfs.ml": 6, "btree.ml": 39, "brainfuck.ml": 75, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3b0ad6db..262543f8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — balance.ml baseline (paren/bracket/brace + balance using Stack). is_balanced walks a string; on opener push, + on closer check stack non-empty + top matches expected opener (else + fail). Returns ok && is_empty stack at end. 5 test cases: + "({[abc]d}e)" ✓, "(a]" ✗, "{[}]" ✗ (mismatched closers), "(())" ✓, + "" ✓ → 3 balanced. Exercises Stack.create / push / pop / is_empty / + s.[!i] / while + bool ref short-circuit. 31 baseline programs total. - 2026-05-09 Phase 5.1 — safe_div.ml baseline + Result.equal / compare / iter_error (+3 tests, 592 total). safe_div divides only if divisor non-zero, returns `Error "..."` otherwise. sum_safe folds From e42ff3b1f6d512057c0cc9f4c0f4641364c6b795 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 07:09:29 +0000 Subject: [PATCH 123/298] ocaml: phase 6 Float module fleshed out (+6 tests, 598 total) New Float members: zero / one / minus_one abs / neg add / sub / mul / div (lift host '+.' '-.' '*.' '/.') max / min (if-based) equal / compare (Float.compare returns -1 / 0 / 1) to_int (host floor) of_int (identity in dynamic runtime) of_string (delegates to _int_of_string) Aligns Float with Int's API and lets baselines use Float.add / Float.compare / etc without lifting the symbols themselves. Float.add 3.5 4.5 = 8 Float.compare 2.5 5.0 = -1 Float.abs -3.7 = 3.7 Float.max 3.14 2.71 = 3.14 --- lib/ocaml/runtime.sx | 17 +++++++++++++++++ lib/ocaml/test.sh | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 45 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 132d7586..6f233907 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -630,6 +630,7 @@ module Float = struct let to_string f = _string_of_float f + let of_string s = _int_of_string s let sqrt f = _float_sqrt f let sin f = _float_sin f let cos f = _float_cos f @@ -638,6 +639,22 @@ let ceil f = _float_ceil f let round f = _float_round f let pi = 3.141592653589793 + let zero = 0.0 + let one = 1.0 + let minus_one = -1.0 + let abs f = if f < 0.0 then 0.0 -. f else f + let neg f = 0.0 -. f + let add a b = a +. b + let sub a b = a -. b + let mul a b = a *. b + let div a b = a /. b + let max a b = if a > b then a else b + let min a b = if a < b then a else b + let equal a b = a = b + let compare a b = + if a < b then -1 else if a > b then 1 else 0 + let to_int f = _float_floor f + let of_int n = n end ;; module Printf = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 1a2b5041..02fcfcc5 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1480,6 +1480,20 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5232) (eval "(ocaml-run \"Result.compare compare compare (Ok 1) (Error \\\"fail\\\")\")") +;; ── Float module additions ─────────────────────────────────── +(epoch 5240) +(eval "(ocaml-run \"Float.add 3.5 4.5\")") +(epoch 5241) +(eval "(ocaml-run \"Float.compare 2.5 5.0\")") +(epoch 5242) +(eval "(ocaml-run \"Float.abs (-3.7)\")") +(epoch 5243) +(eval "(ocaml-run \"Float.max 3.14 2.71\")") +(epoch 5244) +(eval "(ocaml-run \"Float.equal 1.5 1.5\")") +(epoch 5245) +(eval "(ocaml-run \"Float.zero +. Float.one\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2354,6 +2368,14 @@ check 5230 "Result.equal Ok 1 Ok 1" 'true' check 5231 "Result.compare Ok 5 Ok 3" '1' check 5232 "Result.compare Ok < Error" '-1' +# ── Float module additions ────────────────────────────────────── +check 5240 "Float.add 3.5 4.5" '8' +check 5241 "Float.compare 2.5 5.0" '-1' +check 5242 "Float.abs -3.7" '3.7' +check 5243 "Float.max 3.14 2.71" '3.14' +check 5244 "Float.equal 1.5 1.5" 'true' +check 5245 "Float.zero +. Float.one" '1' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 262543f8..da8230e0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Float module fleshed out (+6 tests, 598 + total). New Float members: zero, one, minus_one, abs, neg, add, + sub, mul, div, max, min, equal, compare, to_int, of_int, + of_string. Most just lift the host operators (`+.` is already + available as a global). Aligns Float with Int module's API and + unblocks idiomatic float arithmetic in baselines. - 2026-05-09 Phase 5.1 — balance.ml baseline (paren/bracket/brace balance using Stack). is_balanced walks a string; on opener push, on closer check stack non-empty + top matches expected opener (else From 90418c120b7c4c01de2bb1d6af4e4b2e00ad7511 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 07:19:52 +0000 Subject: [PATCH 124/298] ocaml: phase 5.1 pi_leibniz.ml baseline + int_of_float fix (1000 terms x 100 = 314) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pi_leibniz.ml: Leibniz formula for pi. pi/4 = 1 - 1/3 + 1/5 - 1/7 + ... pi ~= 4 * sum_{k=0}^{n-1} (-1)^k / (2k+1) For n=1000, pi ~= 3.140593. Multiply by 100 and int_of_float -> 314. Side-quest: int_of_float was wrongly defined as identity in iteration 94. Fixed to: let int_of_float f = if f < 0.0 then _float_ceil f else _float_floor f (truncate toward zero, mirroring real OCaml's int_of_float). The identity definition was a stub from when integer/float dispatch was not yet split — now they're separate, the stub is wrong. Float.to_int still uses floor since OCaml's docs say the result is unspecified for nan / out-of-range; close enough for our scope. 32 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/pi_leibniz.ml | 10 ++++++++++ lib/ocaml/runtime.sx | 3 ++- plans/ocaml-on-sx.md | 8 ++++++++ 4 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 lib/ocaml/baseline/pi_leibniz.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 47733ffd..9e36ca4a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -23,6 +23,7 @@ "module_use.ml": 3, "mutable_record.ml": 10, "option_match.ml": 5, + "pi_leibniz.ml": 314, "poly_stack.ml": 5, "queens.ml": 2, "quicksort.ml": 44, diff --git a/lib/ocaml/baseline/pi_leibniz.ml b/lib/ocaml/baseline/pi_leibniz.ml new file mode 100644 index 00000000..343acfe0 --- /dev/null +++ b/lib/ocaml/baseline/pi_leibniz.ml @@ -0,0 +1,10 @@ +let pi_approx n = + let total = ref 0.0 in + for k = 0 to n - 1 do + let sign = if k mod 2 = 0 then 1.0 else -1.0 in + total := !total +. sign /. float_of_int (2 * k + 1) + done; + 4.0 *. !total +;; + +int_of_float (pi_approx 1000 *. 100.0) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 6f233907..d734a29d 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -1180,7 +1180,8 @@ let min_int = -4611686018427387904 let abs_float f = if f < 0.0 then 0.0 -. f else f let float_of_int n = n - let int_of_float f = f + let int_of_float f = + if f < 0.0 then _float_ceil f else _float_floor f ") (define ocaml-stdlib-loaded false) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index da8230e0..5c8df858 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — pi_leibniz.ml baseline (Leibniz formula, + 1000 terms × 100 → 314). Side-quest: `int_of_float` was wrong — + defined as identity in iteration 94 instead of truncation. Fixed + to `if f < 0.0 then ceil else floor` (truncate toward zero, real + OCaml semantics). Float.to_int still uses floor since OCaml's + documentation says "result is unspecified if the argument is nan + or falls outside the int range" — close enough for our scope. 32 + baseline programs total. - 2026-05-09 Phase 6 — Float module fleshed out (+6 tests, 598 total). New Float members: zero, one, minus_one, abs, neg, add, sub, mul, div, max, min, equal, compare, to_int, of_int, From 75a1adbbd5b074740a880daa8664f521c46ef7d2 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 07:31:33 +0000 Subject: [PATCH 125/298] ocaml: phase 5.1 shuffle.ml baseline (Fisher-Yates with deterministic Random) In-place Fisher-Yates shuffle using: Random.init 42 deterministic seed let a = Array.of_list xs for i = n - 1 downto 1 do reverse iteration let j = Random.int (i + 1) let tmp = a.(i) in a.(i) <- a.(j); a.(j) <- tmp done Sum is invariant under permutation, so the test value (55 for [1..10] = 1+2+...+10) verifies the shuffle is a valid permutation regardless of which permutation the seed yields. Exercises Random.init / Random.int + Array.of_list / to_list / length / arr.(i) / arr.(i) <- v + downto loop + multi-statement sequencing within for-body. 33 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/shuffle.ml | 15 +++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 23 insertions(+) create mode 100644 lib/ocaml/baseline/shuffle.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 9e36ca4a..b076242f 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -29,6 +29,7 @@ "quicksort.ml": 44, "roman.ml": 44, "safe_div.ml": 20, + "shuffle.ml": 55, "sieve.ml": 15, "sum_squares.ml": 385, "unique_set.ml": 9, diff --git a/lib/ocaml/baseline/shuffle.ml b/lib/ocaml/baseline/shuffle.ml new file mode 100644 index 00000000..2ea776d8 --- /dev/null +++ b/lib/ocaml/baseline/shuffle.ml @@ -0,0 +1,15 @@ +let shuffle xs = + Random.init 42; + let a = Array.of_list xs in + let n = Array.length a in + for i = n - 1 downto 1 do + let j = Random.int (i + 1) in + let tmp = a.(i) in + a.(i) <- a.(j); + a.(j) <- tmp + done; + Array.to_list a + +;; + +List.fold_left (+) 0 (shuffle [1;2;3;4;5;6;7;8;9;10]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5c8df858..7ac4126c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — shuffle.ml baseline (Fisher-Yates with + deterministic Random.init seed). In-place swap loop using `for i = + n - 1 downto 1` and `a.(i) <- a.(j)`. Sum is invariant under + permutation, so the test value (55 for [1..10]) verifies that the + shuffle is a valid permutation regardless of which one. Exercises + Random.init / Random.int + Array.of_list / to_list / length / + arr.(i) / arr.(i) <- v + downto loop. 33 baseline programs total. - 2026-05-09 Phase 5.1 — pi_leibniz.ml baseline (Leibniz formula, 1000 terms × 100 → 314). Side-quest: `int_of_float` was wrong — defined as identity in iteration 94 instead of truncation. Fixed From 9a8bbff5b264658459201e67ed9c62d45d133d27 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 07:41:01 +0000 Subject: [PATCH 126/298] ocaml: phase 5.1 json_pretty.ml baseline (recursive ADT to string, len = 24) Defines a JSON-like algebraic data type: type json = | JNull | JBool of bool | JInt of int | JStr of string | JList of json list Recursively serialises to a string via match-on-constructor, then measures the length: JList [JInt 1; JBool true; JNull; JStr 'hi'; JList [JInt 2; JInt 3]] -> '[1,true,null,"hi",[2,3]]' length 24 Exercises: - five-constructor ADT (one nullary, three single-arg, one list-arg) - recursive match - String.concat ',' (List.map to_string xs) - string-cat with embedded escaped quotes 34 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/json_pretty.ml | 20 ++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/json_pretty.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b076242f..1b08ac96 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -16,6 +16,7 @@ "frequency.ml": 5, "fizzbuzz.ml": 57, "list_ops.ml": 30, + "json_pretty.ml": 24, "lambda_calc.ml": 7, "levenshtein.ml": 11, "memo_fib.ml": 75025, diff --git a/lib/ocaml/baseline/json_pretty.ml b/lib/ocaml/baseline/json_pretty.ml new file mode 100644 index 00000000..861866c8 --- /dev/null +++ b/lib/ocaml/baseline/json_pretty.ml @@ -0,0 +1,20 @@ +type json = + | JNull + | JBool of bool + | JInt of int + | JStr of string + | JList of json list + +let rec to_string j = + match j with + | JNull -> "null" + | JBool b -> if b then "true" else "false" + | JInt n -> string_of_int n + | JStr s -> "\"" ^ s ^ "\"" + | JList xs -> + "[" ^ String.concat "," (List.map to_string xs) ^ "]" + +;; + +let j = JList [JInt 1; JBool true; JNull; JStr "hi"; JList [JInt 2; JInt 3]] in +String.length (to_string j) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7ac4126c..840fbdc9 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — json_pretty.ml baseline (recursive ADT + serialization). Defines a JSON-like ADT (JNull / JBool / JInt / + JStr / JList) and recursively pretty-prints to a string, then + measures length. Tests algebraic data types with five constructors + (one nullary, three single-arg, one list-arg), recursive `match` + with five arms, `String.concat "," (List.map ...)`, and string + concatenation. `[1,true,null,"hi",[2,3]]` → 24 chars. 34 baseline + programs total. - 2026-05-09 Phase 5.1 — shuffle.ml baseline (Fisher-Yates with deterministic Random.init seed). In-place swap loop using `for i = n - 1 downto 1` and `a.(i) <- a.(j)`. Sum is invariant under From c272b1ea042b6719051e966b71035bd910749f31 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 07:50:24 +0000 Subject: [PATCH 127/298] ocaml: phase 6 Either module + Hashtbl.copy (+4 tests, 602 total) Either module (mirrors OCaml 4.12+ stdlib): left x / right x is_left / is_right find_left / find_right (return Option) map_left / map_right (single-side mappers) fold lf rf e (case dispatch) equal eq_l eq_r a b compare cmp_l cmp_r a b (Left < Right) Constructors are bare 'Left x' / 'Right x' (OCaml 4.12+ exposes them directly without an explicit type-decl). Hashtbl.copy: build a fresh cell with _hashtbl_create walk _hashtbl_to_list and re-add each (k, v) mutating one copy doesn't touch the other (Hashtbl.length t + Hashtbl.length t2 = 3 after fork-and-add verifies that adds to t2 don't appear in t) --- lib/ocaml/runtime.sx | 42 ++++++++++++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 65 insertions(+) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index d734a29d..e39a43b3 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -1030,6 +1030,48 @@ let remove t k = _hashtbl_remove t k let reset t = _hashtbl_clear t let clear t = _hashtbl_clear t + + let copy t = + let t' = _hashtbl_create 8 in + List.iter + (fun (k, v) -> _hashtbl_add t' k v) + (_hashtbl_to_list t); + t' + end ;; + + module Either = struct + let left x = Left x + let right x = Right x + + let is_left e = match e with Left _ -> true | Right _ -> false + let is_right e = match e with Left _ -> false | Right _ -> true + + let find_left e = + match e with Left x -> Some x | Right _ -> None + let find_right e = + match e with Left _ -> None | Right x -> Some x + + let map_left f e = + match e with Left x -> Left (f x) | Right x -> Right x + let map_right f e = + match e with Left x -> Left x | Right x -> Right (f x) + + let fold lf rf e = + match e with Left x -> lf x | Right x -> rf x + + let equal eq_l eq_r a b = + match a with + | Left x -> + (match b with Left y -> eq_l x y | Right _ -> false) + | Right x -> + (match b with Left _ -> false | Right y -> eq_r x y) + + let compare cmp_l cmp_r a b = + match a with + | Left x -> + (match b with Left y -> cmp_l x y | Right _ -> -1) + | Right x -> + (match b with Left _ -> 1 | Right y -> cmp_r x y) end ;; module Map = struct diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 02fcfcc5..5bd5989a 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1494,6 +1494,16 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5245) (eval "(ocaml-run \"Float.zero +. Float.one\")") +;; ── Either module + Hashtbl.copy ───────────────────────────── +(epoch 5250) +(eval "(ocaml-run \"Either.is_left (Left 5)\")") +(epoch 5251) +(eval "(ocaml-run \"Either.fold (fun x -> x + 100) (fun x -> x * 10) (Left 7)\")") +(epoch 5252) +(eval "(ocaml-run \"Either.fold (fun x -> x + 100) (fun x -> x * 10) (Right 7)\")") +(epoch 5253) +(eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"a\\\" 1; let t2 = Hashtbl.copy t in Hashtbl.add t2 \\\"b\\\" 2; Hashtbl.length t + Hashtbl.length t2\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2376,6 +2386,12 @@ check 5243 "Float.max 3.14 2.71" '3.14' check 5244 "Float.equal 1.5 1.5" 'true' check 5245 "Float.zero +. Float.one" '1' +# ── Either module + Hashtbl.copy ──────────────────────────────── +check 5250 "Either.is_left Left" 'true' +check 5251 "Either.fold Left 7+100" '107' +check 5252 "Either.fold Right 7*10" '70' +check 5253 "Hashtbl.copy independent" '3' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 840fbdc9..51911fb1 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — Either module + Hashtbl.copy (+4 tests, 602 + total). Either: left, right, is_left, is_right, find_left, + find_right, map_left, map_right, fold, equal, compare. Constructors + are bare `Left x` / `Right x` (per OCaml 4.12+). Hashtbl.copy + builds a fresh cell, walks `_hashtbl_to_list`, and re-adds; mutating + one copy doesn't touch the other (verified by `Hashtbl.length t + + Hashtbl.length t2 = 3` after a fork-and-add). - 2026-05-09 Phase 5.1 — json_pretty.ml baseline (recursive ADT serialization). Defines a JSON-like ADT (JNull / JBool / JInt / JStr / JList) and recursively pretty-prints to a string, then From 5c70747ac715b1629054aeb886d0901c2ce07c71 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 08:01:21 +0000 Subject: [PATCH 128/298] ocaml: phase 5.1 word_freq.ml baseline (Map.Make on String, distinct = 8) First baseline using Map.Make on a string-keyed map: module StringOrd = struct type t = string let compare = String.compare end module SMap = Map.Make (StringOrd) let count_words text = let words = String.split_on_char ' ' text in List.fold_left (fun m w -> let n = match SMap.find_opt w m with | Some n -> n | None -> 0 in SMap.add w (n + 1) m ) SMap.empty words For 'the quick brown fox jumps over the lazy dog' ('the' appears twice), SMap.cardinal -> 8. Complements bag.ml (Hashtbl-based) and unique_set.ml (Set.Make) with a sorted Map view of the same kind of counting problem. 35 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/word_freq.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/word_freq.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 1b08ac96..3453f55b 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "roman.ml": 44, "safe_div.ml": 20, "shuffle.ml": 55, + "word_freq.ml": 8, "sieve.ml": 15, "sum_squares.ml": 385, "unique_set.ml": 9, diff --git a/lib/ocaml/baseline/word_freq.ml b/lib/ocaml/baseline/word_freq.ml new file mode 100644 index 00000000..13dcac56 --- /dev/null +++ b/lib/ocaml/baseline/word_freq.ml @@ -0,0 +1,21 @@ +module StringOrd = struct + type t = string + let compare = String.compare +end + +module SMap = Map.Make (StringOrd) + +let count_words text = + let words = String.split_on_char ' ' text in + List.fold_left (fun m w -> + let n = match SMap.find_opt w m with + | Some n -> n + | None -> 0 + in + SMap.add w (n + 1) m + ) SMap.empty words + +;; + +let m = count_words "the quick brown fox jumps over the lazy dog" in +SMap.cardinal m diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 51911fb1..4f7654f3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — word_freq.ml baseline (Map.Make on String, + count distinct words → 8). Defines a StringOrd module + applies + Map.Make to it. Folds the input through SMap.find_opt + SMap.add to + count each word, then reports SMap.cardinal. "the quick brown fox + jumps over the lazy dog" — "the" appears twice, so 8 distinct + words. First baseline using Map.Make on a string-keyed map. 35 + baseline programs total. - 2026-05-09 Phase 6 — Either module + Hashtbl.copy (+4 tests, 602 total). Either: left, right, is_left, is_right, find_left, find_right, map_left, map_right, fold, equal, compare. Constructors From 1a828d5b9fb60fd09050332c5e65981cac69f21f Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 08:11:07 +0000 Subject: [PATCH 129/298] ocaml: phase 5.1 validate.ml baseline (Either-based validation, 3 errs * 100 + 117 = 417) validate_int returns Left msg on empty / non-digit, Right (int_of_string s) on a digit-only string. process folds inputs with a tuple accumulator (errs, sum), branching on the result. ['12'; 'abc'; '5'; ''; '100'; 'x'] -> 3 errors (abc, '', x), valid sum = 12+5+100 = 117 -> errs * 100 + sum = 417 Exercises: - Either constructors used bare (Left/Right without 'Either.' qualification) - char range comparison: c >= '0' && c <= '9' - tuple-pattern destructuring on let-binding (iter 98) - recursive helper defined inside if-else - List.fold_left with tuple accumulator 36 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/validate.ml | 24 ++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 34 insertions(+) create mode 100644 lib/ocaml/baseline/validate.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3453f55b..367541d2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -35,5 +35,6 @@ "sieve.ml": 15, "sum_squares.ml": 385, "unique_set.ml": 9, + "validate.ml": 417, "word_count.ml": 3 } diff --git a/lib/ocaml/baseline/validate.ml b/lib/ocaml/baseline/validate.ml new file mode 100644 index 00000000..5065bbcb --- /dev/null +++ b/lib/ocaml/baseline/validate.ml @@ -0,0 +1,24 @@ +let validate_int s = + if String.length s = 0 then Left "empty" + else + let rec all_digits i = + if i >= String.length s then true + else + let c = s.[i] in + if c >= '0' && c <= '9' then all_digits (i + 1) + else false + in + if all_digits 0 then Right (int_of_string s) + else Left ("not a number: " ^ s) + +let process inputs = + List.fold_left (fun (errs, vals) s -> + match validate_int s with + | Left _ -> (errs + 1, vals) + | Right v -> (errs, vals + v) + ) (0, 0) inputs + +;; + +let (errs, sum) = process ["12"; "abc"; "5"; ""; "100"; "x"] in +errs * 100 + sum diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 4f7654f3..7ede6105 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — validate.ml baseline (Either-based input + validation, 3 errors × 100 + 117 sum = 417). validate_int returns + `Left msg` on empty / non-digit, `Right (int_of_string s)` on a + digit-only string. process folds inputs with a tuple accumulator + `(errs, sum)`, branching on the result. ["12"; "abc"; "5"; ""; + "100"; "x"] → (3, 117) → 417. Exercises Either constructors used + bare (no qualification), char range comparison, tuple-pattern + destructuring on let-binding, recursive helper inside if-else. 36 + baseline programs total. - 2026-05-09 Phase 5.1 — word_freq.ml baseline (Map.Make on String, count distinct words → 8). Defines a StringOrd module + applies Map.Make to it. Folds the input through SMap.find_opt + SMap.add to From 39f4c7a9a86b490f53120e6cdf96e934407687a7 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 08:21:19 +0000 Subject: [PATCH 130/298] ocaml: phase 5.1 hanoi.ml baseline (Tower of Hanoi move count, n=10 -> 1023) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Classic doubly-recursive solution returning the move count: hanoi n from to via = if n = 0 then 0 else hanoi (n-1) from via to + 1 + hanoi (n-1) via to from For n = 10, returns 2^10 - 1 = 1023. Exercises 4-arg recursion, conditional base case, and tail-position addition. Uses 'to_' instead of 'to' for the destination param to avoid collision with the 'to' keyword in for-loops — the OCaml conventional workaround. 37 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/hanoi.ml | 11 +++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/hanoi.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 367541d2..6a61cd43 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -14,6 +14,7 @@ "factorial.ml": 3628800, "fraction.ml": 7, "frequency.ml": 5, + "hanoi.ml": 1023, "fizzbuzz.ml": 57, "list_ops.ml": 30, "json_pretty.ml": 24, diff --git a/lib/ocaml/baseline/hanoi.ml b/lib/ocaml/baseline/hanoi.ml new file mode 100644 index 00000000..a57cc1f5 --- /dev/null +++ b/lib/ocaml/baseline/hanoi.ml @@ -0,0 +1,11 @@ +let rec hanoi n from to_ via = + if n = 0 then 0 + else + let a = hanoi (n - 1) from via to_ in + let b = 1 in + let c = hanoi (n - 1) via to_ from in + a + b + c + +;; + +hanoi 10 1 3 2 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7ede6105..3e799773 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — hanoi.ml baseline (Tower of Hanoi move + count, n=10 → 1023). Classic doubly-recursive solution returning + the number of moves: `hanoi n from to via = hanoi (n-1) from via + to + 1 + hanoi (n-1) via to from`. Counts to 2^10 - 1 = 1023 for + n=10, exercising tail-position addition + 4-arg recursion + + conditional base case. (Uses `to_` instead of `to` to avoid + collision with the `to` keyword in for-loops — OCaml conventional + workaround.) 37 baseline programs total. - 2026-05-09 Phase 5.1 — validate.ml baseline (Either-based input validation, 3 errors × 100 + 117 sum = 417). validate_int returns `Left msg` on empty / non-digit, `Right (int_of_string s)` on a From 13fb1bd7a981dfd64bf92cccd33b5df71ffaa196 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 08:29:01 +0000 Subject: [PATCH 131/298] ocaml: phase 5.1 newton_sqrt.ml baseline (Newton's method, sqrt(2)*1000 = 1414) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newton's method for square root: let sqrt_newton x = let g = ref 1.0 in for _ = 1 to 20 do g := (!g +. x /. !g) /. 2.0 done; !g 20 iterations is more than enough to converge for x=2 — result is ~1.414213562. Multiplied by 1000 and int_of_float'd: 1414. First baseline exercising: - for _ = 1 to N do ... done (wildcard loop variable) - pure float arithmetic with +. /. - the int_of_float truncate-toward-zero fix from iter 117 38 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/newton_sqrt.ml | 10 ++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/newton_sqrt.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6a61cd43..a11b46ce 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -23,6 +23,7 @@ "memo_fib.ml": 75025, "merge_sort.ml": 44, "module_use.ml": 3, + "newton_sqrt.ml": 1414, "mutable_record.ml": 10, "option_match.ml": 5, "pi_leibniz.ml": 314, diff --git a/lib/ocaml/baseline/newton_sqrt.ml b/lib/ocaml/baseline/newton_sqrt.ml new file mode 100644 index 00000000..51f95773 --- /dev/null +++ b/lib/ocaml/baseline/newton_sqrt.ml @@ -0,0 +1,10 @@ +let sqrt_newton x = + let g = ref 1.0 in + for _ = 1 to 20 do + g := (!g +. x /. !g) /. 2.0 + done; + !g + +;; + +int_of_float (sqrt_newton 2.0 *. 1000.0) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3e799773..f9651bcc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — newton_sqrt.ml baseline (Newton's method + for sqrt, sqrt(2)*1000 truncated → 1414). 20 iterations of + `g := (g + x/g) / 2` converges to ~1.414213562 for x=2. Multiplied + by 1000 and int_of_float'd gives 1414. First baseline that + exercises `for _ = 1 to N do ... done` (wildcard loop variable), + pure float arithmetic with `+.` `/.`, and the `int_of_float` fix + from iteration 117. 38 baseline programs total. - 2026-05-09 Phase 5.1 — hanoi.ml baseline (Tower of Hanoi move count, n=10 → 1023). Classic doubly-recursive solution returning the number of moves: `hanoi n from to via = hanoi (n-1) from via From 095bb62ef91a21873e1da48affe28cc62e21a98f Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 08:39:56 +0000 Subject: [PATCH 132/298] ocaml: phase 5.1 rpn.ml baseline (Reverse Polish Notation evaluator, [3 4 + 2 * 5 -] = 9) Stack-based RPN evaluator: let eval_rpn tokens = let stack = Stack.create () in List.iter (fun tok -> if tok is operator then let b = Stack.pop stack in let a = Stack.pop stack in Stack.push (apply tok a b) stack else Stack.push (int_of_string tok) stack ) tokens; Stack.pop stack For tokens [3 4 + 2 * 5 -]: 3 4 + -> 7 7 2 * -> 14 14 5 - -> 9 Exercises Stack.create / push / pop, mixed branch on string equality, multi-arm if/else if for operator dispatch, int_of_string for token parsing. 39 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/rpn.ml | 20 ++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/rpn.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index a11b46ce..f2f5b5c2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "queens.ml": 2, "quicksort.ml": 44, "roman.ml": 44, + "rpn.ml": 9, "safe_div.ml": 20, "shuffle.ml": 55, "word_freq.ml": 8, diff --git a/lib/ocaml/baseline/rpn.ml b/lib/ocaml/baseline/rpn.ml new file mode 100644 index 00000000..d2fc7e94 --- /dev/null +++ b/lib/ocaml/baseline/rpn.ml @@ -0,0 +1,20 @@ +let eval_rpn tokens = + let stack = Stack.create () in + List.iter (fun tok -> + if tok = "+" || tok = "-" || tok = "*" || tok = "/" then begin + let b = Stack.pop stack in + let a = Stack.pop stack in + let r = if tok = "+" then a + b + else if tok = "-" then a - b + else if tok = "*" then a * b + else a / b + in + Stack.push r stack + end else + Stack.push (int_of_string tok) stack + ) tokens; + Stack.pop stack + +;; + +eval_rpn ["3"; "4"; "+"; "2"; "*"; "5"; "-"] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f9651bcc..a5c1e94f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — rpn.ml baseline (Reverse Polish Notation + evaluator using Stack). Walks the token list with List.iter, pushes + ints onto the stack, on operator tokens pops two operands and + pushes the result. `[3 4 + 2 * 5 -]` evaluates as 3+4=7, 7*2=14, + 14-5=9 → 9. Exercises Stack.create / push / pop, mixed branch on + string equality, multi-arm if/else if, int_of_string for token + parsing. 39 baseline programs total. - 2026-05-09 Phase 5.1 — newton_sqrt.ml baseline (Newton's method for sqrt, sqrt(2)*1000 truncated → 1414). 20 iterations of `g := (g + x/g) / 2` converges to ~1.414213562 for x=2. Multiplied From 70b9b4f6cf38dfa6aff86eec1caa8c6b481ca5d3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 08:50:12 +0000 Subject: [PATCH 133/298] ocaml: phase 5.1 ackermann.ml baseline (ack(3, 4) = 125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Classic Ackermann function: let rec ack m n = if m = 0 then n + 1 else if n = 0 then ack (m - 1) 1 else ack (m - 1) (ack m (n - 1)) ack(3, 4) = 125, expanding to ~6700 evaluator frames — a useful stress test of the call stack and control transfer. Real OCaml evaluates this in milliseconds; ours takes ~2 minutes on a contended host but completes correctly. 40 baseline programs total. --- lib/ocaml/baseline/ackermann.ml | 8 ++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 16 insertions(+) create mode 100644 lib/ocaml/baseline/ackermann.ml diff --git a/lib/ocaml/baseline/ackermann.ml b/lib/ocaml/baseline/ackermann.ml new file mode 100644 index 00000000..8964ab06 --- /dev/null +++ b/lib/ocaml/baseline/ackermann.ml @@ -0,0 +1,8 @@ +let rec ack m n = + if m = 0 then n + 1 + else if n = 0 then ack (m - 1) 1 + else ack (m - 1) (ack m (n - 1)) + +;; + +ack 3 4 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f2f5b5c2..dce3387c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,4 +1,5 @@ { + "ackermann.ml": 125, "anagrams.ml": 3, "bag.ml": 3, "balance.ml": 3, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index a5c1e94f..c1386138 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — ackermann.ml baseline (Ackermann function, + ack(3, 4) = 125). Three-arm recursion: m=0 base, n=0 reduces m, + else doubly-nested recursion `ack (m-1) (ack m (n-1))`. ack(3, 4) + expands to ~6700 frames in our spec-level evaluator, so a useful + exercise of the call stack and control transfer. Real OCaml + evaluates the same in milliseconds; ours takes ~2 minutes on a + contended host but completes correctly. 40 baseline programs total. - 2026-05-09 Phase 5.1 — rpn.ml baseline (Reverse Polish Notation evaluator using Stack). Walks the token list with List.iter, pushes ints onto the stack, on operator tokens pops two operands and From aaaf05444147af9aae38cbc7da0414e6bbe2b98f Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 09:15:00 +0000 Subject: [PATCH 134/298] ocaml: phase 4 bitwise ops land/lor/lxor/lsl/lsr/asr + bits.ml baseline (+5 tests, 607 total) The binop precedence table already had land/lor/lxor/lsl/lsr/asr (iter 0 setup) but eval-op fell through to 'unknown operator' for all of them. SX doesn't expose host bitwise primitives, so each is implemented in eval.sx via arithmetic on the host: land/lor/lxor: mask & shift loop, accumulating 1< 0 do if m land 1 = 1 then ... ; m := m lsr 1 done'. Sum of popcount(1023, 5, 1024, 0xff) = 10 + 2 + 1 + 8 = 21. 5 land 3 = 1 5 lor 3 = 7 5 lxor 3 = 6 1 lsl 8 = 256 256 lsr 4 = 16 41 baseline programs total. --- lib/ocaml/baseline/bits.ml | 12 ++++++ lib/ocaml/baseline/expected.json | 1 + lib/ocaml/eval.sx | 73 ++++++++++++++++++++++++++++++++ lib/ocaml/test.sh | 19 +++++++++ plans/ocaml-on-sx.md | 8 ++++ 5 files changed, 113 insertions(+) create mode 100644 lib/ocaml/baseline/bits.ml diff --git a/lib/ocaml/baseline/bits.ml b/lib/ocaml/baseline/bits.ml new file mode 100644 index 00000000..e060a8e5 --- /dev/null +++ b/lib/ocaml/baseline/bits.ml @@ -0,0 +1,12 @@ +let popcount n = + let count = ref 0 in + let m = ref n in + while !m > 0 do + if !m land 1 = 1 then count := !count + 1; + m := !m lsr 1 + done; + !count + +;; + +popcount 1023 + popcount 5 + popcount 1024 + popcount 0xff diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index dce3387c..ee54648f 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -2,6 +2,7 @@ "ackermann.ml": 125, "anagrams.ml": 3, "bag.ml": 3, + "bits.ml": 21, "balance.ml": 3, "bfs.ml": 6, "btree.ml": 39, diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index f22357d7..71b2eabc 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -513,6 +513,79 @@ ((= op ">=") (>= lhs rhs)) ((= op "&&") (and lhs rhs)) ((= op "||") (or lhs rhs)) + ;; Bitwise ops — implemented via arithmetic since SX doesn't + ;; expose host bitwise primitives. + ((= op "land") + (let ((r 0) (f 1) (a lhs) (b rhs)) + (begin + (define loop + (fn () + (when (and (> a 0) (> b 0)) + (begin + (when (and (= (mod a 2) 1) (= (mod b 2) 1)) + (set! r (+ r f))) + (set! a (floor (/ a 2))) + (set! b (floor (/ b 2))) + (set! f (* f 2)) + (loop))))) + (loop) + r))) + ((= op "lor") + (let ((r 0) (f 1) (a lhs) (b rhs)) + (begin + (define loop + (fn () + (when (or (> a 0) (> b 0)) + (begin + (when (or (= (mod a 2) 1) (= (mod b 2) 1)) + (set! r (+ r f))) + (set! a (floor (/ a 2))) + (set! b (floor (/ b 2))) + (set! f (* f 2)) + (loop))))) + (loop) + r))) + ((= op "lxor") + (let ((r 0) (f 1) (a lhs) (b rhs)) + (begin + (define loop + (fn () + (when (or (> a 0) (> b 0)) + (begin + (when (not (= (mod a 2) (mod b 2))) + (set! r (+ r f))) + (set! a (floor (/ a 2))) + (set! b (floor (/ b 2))) + (set! f (* f 2)) + (loop))))) + (loop) + r))) + ((= op "lsl") + (let ((r lhs) (k rhs)) + (begin + (define loop + (fn () + (when (> k 0) (begin (set! r (* r 2)) (set! k (- k 1)) (loop))))) + (loop) + r))) + ((= op "lsr") + (let ((r lhs) (k rhs)) + (begin + (define loop + (fn () + (when (> k 0) + (begin (set! r (floor (/ r 2))) (set! k (- k 1)) (loop))))) + (loop) + r))) + ((= op "asr") + (let ((r lhs) (k rhs)) + (begin + (define loop + (fn () + (when (> k 0) + (begin (set! r (floor (/ r 2))) (set! k (- k 1)) (loop))))) + (loop) + r))) ((= op "or") (or lhs rhs)) ((= op "|>") (rhs lhs)) (else (error (str "ocaml-eval: unknown operator " op)))))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index 5bd5989a..0b516865 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1504,6 +1504,18 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5253) (eval "(ocaml-run \"let t = Hashtbl.create 4 in Hashtbl.add t \\\"a\\\" 1; let t2 = Hashtbl.copy t in Hashtbl.add t2 \\\"b\\\" 2; Hashtbl.length t + Hashtbl.length t2\")") +;; ── Bitwise ops land/lor/lxor/lsl/lsr ──────────────────────── +(epoch 5260) +(eval "(ocaml-run \"5 land 3\")") +(epoch 5261) +(eval "(ocaml-run \"5 lor 3\")") +(epoch 5262) +(eval "(ocaml-run \"5 lxor 3\")") +(epoch 5263) +(eval "(ocaml-run \"1 lsl 8\")") +(epoch 5264) +(eval "(ocaml-run \"256 lsr 4\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2392,6 +2404,13 @@ check 5251 "Either.fold Left 7+100" '107' check 5252 "Either.fold Right 7*10" '70' check 5253 "Hashtbl.copy independent" '3' +# ── Bitwise ops land/lor/lxor/lsl/lsr ─────────────────────────── +check 5260 "5 land 3 = 1" '1' +check 5261 "5 lor 3 = 7" '7' +check 5262 "5 lxor 3 = 6" '6' +check 5263 "1 lsl 8 = 256" '256' +check 5264 "256 lsr 4 = 16" '16' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c1386138..b767af7c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 4 — bitwise ops `land`/`lor`/`lxor`/`lsl`/`lsr`/ + `asr` + bits.ml baseline (popcount-sum = 21) (+5 tests, 607 total). + The binop precedence table already had these but eval-op fell + through to "unknown operator". Implemented in eval.sx via + arithmetic on the host (mod / floor / div) since SX doesn't expose + bitwise primitives. asr aliased to lsr (no sign extension at our + bit width). bits.ml exercises `m land 1` + `m lsr 1` inside a while + loop. 41 baseline programs total. - 2026-05-09 Phase 5.1 — ackermann.ml baseline (Ackermann function, ack(3, 4) = 125). Three-arm recursion: m=0 base, n=0 reduces m, else doubly-nested recursion `ack (m-1) (ack m (n-1))`. ack(3, 4) From b0cbdaf713c47680c55c908e25ee08ae33ab6566 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 09:24:41 +0000 Subject: [PATCH 135/298] ocaml: phase 5.1 pretty_table.ml baseline (Buffer + Printf widths, len = 64) Builds a 4-row scoreboard via Buffer + Printf.sprintf: Buffer.add_string buf (Printf.sprintf '%-10s %4d\n' name score) Each row is exactly 16 chars regardless of actual name/score length: 10 name padding + 1 space + 4 score padding + 1 newline. 4 rows -> 64 chars total. Combines: - Buffer.add_string + Printf.sprintf - %-Ns left-justified string and %Nd right-justified int width - List.iter on tuple-pattern args (iter 101 fun (a, b)) 42 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/pretty_table.ml | 10 ++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 19 insertions(+) create mode 100644 lib/ocaml/baseline/pretty_table.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ee54648f..ae92cffc 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -29,6 +29,7 @@ "mutable_record.ml": 10, "option_match.ml": 5, "pi_leibniz.ml": 314, + "pretty_table.ml": 64, "poly_stack.ml": 5, "queens.ml": 2, "quicksort.ml": 44, diff --git a/lib/ocaml/baseline/pretty_table.ml b/lib/ocaml/baseline/pretty_table.ml new file mode 100644 index 00000000..a3df8e3c --- /dev/null +++ b/lib/ocaml/baseline/pretty_table.ml @@ -0,0 +1,10 @@ +let table rows = + let buf = Buffer.create 64 in + List.iter (fun (name, score) -> + Buffer.add_string buf (Printf.sprintf "%-10s %4d\n" name score) + ) rows; + Buffer.contents buf + +;; + +String.length (table [("alice", 95); ("bob", 67); ("carol", 100); ("dave", 8)]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b767af7c..f4747065 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — pretty_table.ml baseline (Buffer + Printf + width specifiers, total length 64). Builds a 4-row scoreboard via + Buffer + `Printf.sprintf "%-10s %4d\n"`. Each row is exactly 16 + chars (10 name + 1 space + 4 score + 1 newline) regardless of + actual content length thanks to width padding. 4 rows = 64 chars. + Combines Buffer.add_string + Printf.sprintf with `%-Ns` / + `%Nd` width specifiers + List.iter on tuple-pattern args. 42 + baseline programs total. - 2026-05-09 Phase 4 — bitwise ops `land`/`lor`/`lxor`/`lsl`/`lsr`/ `asr` + bits.ml baseline (popcount-sum = 21) (+5 tests, 607 total). The binop precedence table already had these but eval-op fell From 3be2dc6e78240f615ee7b3f00acf22e26e2afa7c Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 09:34:40 +0000 Subject: [PATCH 136/298] ocaml: phase 5.1 grep_count.ml baseline (substring-aware line filter, 3 matches) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defines a recursive str_contains that walks the haystack with String.sub to find a needle substring. Real OCaml's String.contains only accepts a single char, so this baseline implements its own substring search to stay portable. let rec str_contains s sub i = if i + sl > nl then false else if String.sub s i sl = sub then true else str_contains s sub (i + 1) count_matching splits text on newlines, folds with the predicate. 'the quick brown fox\nfox runs fast\nthe dog\nfoxes are clever' needle = 'fox' matches = 3 (lines 1, 2, 4 — 'foxes' contains 'fox') 43 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/grep_count.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 25 insertions(+) create mode 100644 lib/ocaml/baseline/grep_count.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ae92cffc..b86add6d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -16,6 +16,7 @@ "factorial.ml": 3628800, "fraction.ml": 7, "frequency.ml": 5, + "grep_count.ml": 3, "hanoi.ml": 1023, "fizzbuzz.ml": 57, "list_ops.ml": 30, diff --git a/lib/ocaml/baseline/grep_count.ml b/lib/ocaml/baseline/grep_count.ml new file mode 100644 index 00000000..7b341f57 --- /dev/null +++ b/lib/ocaml/baseline/grep_count.ml @@ -0,0 +1,17 @@ +let rec str_contains s sub i = + let nl = String.length s in + let sl = String.length sub in + if i + sl > nl then false + else if String.sub s i sl = sub then true + else str_contains s sub (i + 1) + +let count_matching needle text = + let lines = String.split_on_char '\n' text in + List.fold_left (fun acc line -> + if str_contains line needle 0 then acc + 1 + else acc + ) 0 lines + +;; + +count_matching "fox" "the quick brown fox\nfox runs fast\nthe dog\nfoxes are clever" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f4747065..23cb7bd0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — grep_count.ml baseline (substring-aware + line filter, 3 lines match). Defines a recursive `str_contains` + that walks the haystack with `String.sub` slices to find a needle + substring (real OCaml's `String.contains` only takes a char). + Splits text on `'\n'` then folds with the contains predicate. Test + text has 4 lines, 3 contain 'fox' (incl 'foxes'). 43 baseline + programs total. - 2026-05-09 Phase 5.1 — pretty_table.ml baseline (Buffer + Printf width specifiers, total length 64). Builds a 4-row scoreboard via Buffer + `Printf.sprintf "%-10s %4d\n"`. Each row is exactly 16 From 32aba1823db4fcf6474d973145a0424d52f8cb5d Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 09:47:25 +0000 Subject: [PATCH 137/298] ocaml: phase 5.1 run_length.ml baseline (RLE, sum-of-counts = 11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run-length encoding via tail-recursive 4-arg accumulator: let rle xs = let rec aux xs cur n acc = match xs with | [] -> List.rev ((cur, n) :: acc) | h :: t -> if h = cur then aux t cur (n + 1) acc else aux t h 1 ((cur, n) :: acc) in match xs with | [] -> [] | h :: t -> aux t h 1 [] rle [1;1;1;2;2;3;3;3;3;1;1] = [(1,3);(2,2);(3,4);(1,2)] sum of counts = 11 (matches input length) The sum-of-counts test verifies that the encoding preserves total length — drops or duplicates would diverge. 44 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/run_length.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/run_length.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b86add6d..d4382a4c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -36,6 +36,7 @@ "quicksort.ml": 44, "roman.ml": 44, "rpn.ml": 9, + "run_length.ml": 11, "safe_div.ml": 20, "shuffle.ml": 55, "word_freq.ml": 8, diff --git a/lib/ocaml/baseline/run_length.ml b/lib/ocaml/baseline/run_length.ml new file mode 100644 index 00000000..c2ea6737 --- /dev/null +++ b/lib/ocaml/baseline/run_length.ml @@ -0,0 +1,16 @@ +let rle xs = + let rec aux xs cur n acc = + match xs with + | [] -> List.rev ((cur, n) :: acc) + | h :: t -> + if h = cur then aux t cur (n + 1) acc + else aux t h 1 ((cur, n) :: acc) + in + match xs with + | [] -> [] + | h :: t -> aux t h 1 [] + +;; + +List.fold_left (fun acc (_, n) -> acc + n) 0 + (rle [1;1;1;2;2;3;3;3;3;1;1]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 23cb7bd0..f0fd71d6 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — run_length.ml baseline (run-length encoding, + sum of counts = 11). RLE encodes [1;1;1;2;2;3;3;3;3;1;1] as + [(1,3);(2,2);(3,4);(1,2)]. Sum-of-counts = 11 verifies that the + encoding preserves total length. Tail-recursive accumulator with + 4-arg helper, two-arm dispatch on whether the next element matches + the current run head, List.rev to restore order, fold_left with + tuple-pattern fun. 44 baseline programs total. - 2026-05-09 Phase 5.1 — grep_count.ml baseline (substring-aware line filter, 3 lines match). Defines a recursive `str_contains` that walks the haystack with `String.sub` slices to find a needle From da6d8e39c982a3e0b9fcfe904cbed6b848c76510 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 09:57:18 +0000 Subject: [PATCH 138/298] ocaml: phase 5.1 pascal.ml baseline (Pascal triangle row 10 middle = C(10,5) = 252) next_row prepends 1, walks adjacent pairs (x, y) emitting x+y, appends a final 1: let rec next_row prev = let rec aux a = match a with | [_] -> [1] | x :: y :: rest -> (x + y) :: aux (y :: rest) | [] -> [] in 1 :: aux prev row n iterates next_row n times starting from [1] using a ref + 'for _ = 1 to n do r := next_row !r done'. row 10 = [1;10;45;120;210;252;210;120;45;10;1] List.nth (row 10) 5 = 252 = C(10, 5) Exercises three-arm match including [_] singleton wildcard, x :: y :: rest binding, and the for-loop with wildcard counter. 45 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/pascal.ml | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/pascal.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d4382a4c..f0724929 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -29,6 +29,7 @@ "newton_sqrt.ml": 1414, "mutable_record.ml": 10, "option_match.ml": 5, + "pascal.ml": 252, "pi_leibniz.ml": 314, "pretty_table.ml": 64, "poly_stack.ml": 5, diff --git a/lib/ocaml/baseline/pascal.ml b/lib/ocaml/baseline/pascal.ml new file mode 100644 index 00000000..b421f791 --- /dev/null +++ b/lib/ocaml/baseline/pascal.ml @@ -0,0 +1,19 @@ +let rec next_row prev = + let rec aux a = + match a with + | [_] -> [1] + | x :: y :: rest -> (x + y) :: aux (y :: rest) + | [] -> [] + in + 1 :: aux prev + +let row n = + let r = ref [1] in + for _ = 1 to n do + r := next_row !r + done; + !r + +;; + +List.nth (row 10) 5 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f0fd71d6..e263bf85 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — pascal.ml baseline (Pascal's triangle row + 10 middle = C(10, 5) = 252). next_row prepends 1, walks adjacent + pairs (x, y) emitting x+y, appends a final 1. row n iterates + next_row n times starting from [1]. Three-arm match including + `[_]` (singleton wildcard) and `x :: y :: rest`. Iteration via + `for _ = 1 to n do r := next_row !r done`. 45 baseline programs + total. - 2026-05-09 Phase 5.1 — run_length.ml baseline (run-length encoding, sum of counts = 11). RLE encodes [1;1;1;2;2;3;3;3;3;1;1] as [(1,3);(2,2);(3,4);(1,2)]. Sum-of-counts = 11 verifies that the From b3d5da53610cfec61aec473aeb08d22d5402d20e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 10:07:12 +0000 Subject: [PATCH 139/298] ocaml: phase 5.1 kadane.ml baseline (max subarray sum = 6) Kadane's algorithm in O(n): let max_subarray xs = let max_so_far = ref min_int in let cur = ref 0 in List.iter (fun x -> cur := max x (!cur + x); max_so_far := max !max_so_far !cur ) xs; !max_so_far For [-2;1;-3;4;-1;2;1;-5;4] the optimal subarray is [4;-1;2;1] = 6. Exercises min_int (iter 94), max as global, ref / ! / :=, and List.iter with two side-effecting steps in one closure body. 46 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/kadane.ml | 12 ++++++++++++ plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/kadane.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f0724929..5e4f9195 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -21,6 +21,7 @@ "fizzbuzz.ml": 57, "list_ops.ml": 30, "json_pretty.ml": 24, + "kadane.ml": 6, "lambda_calc.ml": 7, "levenshtein.ml": 11, "memo_fib.ml": 75025, diff --git a/lib/ocaml/baseline/kadane.ml b/lib/ocaml/baseline/kadane.ml new file mode 100644 index 00000000..49774818 --- /dev/null +++ b/lib/ocaml/baseline/kadane.ml @@ -0,0 +1,12 @@ +let max_subarray xs = + let max_so_far = ref min_int in + let cur = ref 0 in + List.iter (fun x -> + cur := max x (!cur + x); + max_so_far := max !max_so_far !cur + ) xs; + !max_so_far + +;; + +max_subarray [-2; 1; -3; 4; -1; 2; 1; -5; 4] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e263bf85..aabe7b2e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — kadane.ml baseline (Kadane's max subarray + sum = 6). Classic O(n) algorithm using two refs and `max`. For + [-2;1;-3;4;-1;2;1;-5;4] the optimal subarray is [4;-1;2;1] = 6. + Exercises `min_int`, `max`, ref/!/:=, and List.iter with multiple + side-effecting steps in one closure body. 46 baseline programs total. - 2026-05-09 Phase 5.1 — pascal.ml baseline (Pascal's triangle row 10 middle = C(10, 5) = 252). next_row prepends 1, walks adjacent pairs (x, y) emitting x+y, appends a final 1. row n iterates From aee7226b9c65009c48b0d100c8b682ac651ebc48 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 10:21:11 +0000 Subject: [PATCH 140/298] ocaml: phase 5.1 coin_change.ml baseline (DP, 67c with [1;5;10;25] = 6 coins) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bottom-up dynamic programming. dp[i] = minimum coins to make amount i. let dp = Array.make (target + 1) (target + 1) in (* sentinel *) dp.(0) <- 0; for i = 1 to target do List.iter (fun c -> if c <= i && dp.(i - c) + 1 < dp.(i) then dp.(i) <- dp.(i - c) + 1 ) coins done Sentinel 'target + 1' means impossible — any real solution uses at most 'target' coins. coin_change [1; 5; 10; 25] 67 = 6 (= 25+25+10+5+1+1) Exercises Array.make + arr.(i) + arr.(i) <- v + nested for/List.iter + guard 'c <= i'. 47 baseline programs total. --- lib/ocaml/baseline/coin_change.ml | 15 +++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/coin_change.ml diff --git a/lib/ocaml/baseline/coin_change.ml b/lib/ocaml/baseline/coin_change.ml new file mode 100644 index 00000000..5743a879 --- /dev/null +++ b/lib/ocaml/baseline/coin_change.ml @@ -0,0 +1,15 @@ +let coin_change coins target = + let dp = Array.make (target + 1) (target + 1) in + dp.(0) <- 0; + for i = 1 to target do + List.iter (fun c -> + if c <= i && dp.(i - c) + 1 < dp.(i) then + dp.(i) <- dp.(i - c) + 1 + ) coins + done; + if dp.(target) > target then -1 + else dp.(target) + +;; + +coin_change [1; 5; 10; 25] 67 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 5e4f9195..6e0e9145 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -10,6 +10,7 @@ "caesar.ml": 215, "calc.ml": 13, "closures.ml": 315, + "coin_change.ml": 6, "csv.ml": 10, "exception_handle.ml": 4, "expr_eval.ml": 16, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index aabe7b2e..0d6bb898 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — coin_change.ml baseline (min coin DP, 67¢ + with [1;5;10;25] → 6 coins). Bottom-up DP: `dp[i]` = min coins + for amount i. For each amount 1..target, iterate coins and + relax `dp[i] = min(dp[i], dp[i-c] + 1)`. Sentinel `target + 1` + represents "impossible" since any real solution uses at most + target coins. 67 = 25+25+10+5+1+1 = 6 coins. Exercises + Array.make + arr.(i) + arr.(i) <- v + nested for/List.iter + + guard `c <= i`. 47 baseline programs total. - 2026-05-09 Phase 5.1 — kadane.ml baseline (Kadane's max subarray sum = 6). Classic O(n) algorithm using two refs and `max`. For [-2;1;-3;4;-1;2;1;-5;4] the optimal subarray is [4;-1;2;1] = 6. From 073ea44fdbd52f9059ec715a8fdce3f74f4c490c Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 10:31:22 +0000 Subject: [PATCH 141/298] ocaml: phase 5.1 palindrome.ml baseline (two-pointer check, 4/6 inputs match) Two-pointer palindrome check: let is_palindrome s = let n = String.length s in let rec check i j = if i >= j then true else if s.[i] <> s.[j] then false else check (i + 1) (j - 1) in check 0 (n - 1) Tests on six strings: racecar = true hello = false abba = true '' = true (vacuously, i >= j on entry) 'a' = true 'ab' = false Sum = 4. Uses s.[i] <> s.[j] (string-get + structural inequality), recursive 2-arg pointer advancement, and a multi-clause if/else if/else for the three cases. 48 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/palindrome.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 25 insertions(+) create mode 100644 lib/ocaml/baseline/palindrome.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6e0e9145..cecf44fb 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "newton_sqrt.ml": 1414, "mutable_record.ml": 10, "option_match.ml": 5, + "palindrome.ml": 4, "pascal.ml": 252, "pi_leibniz.ml": 314, "pretty_table.ml": 64, diff --git a/lib/ocaml/baseline/palindrome.ml b/lib/ocaml/baseline/palindrome.ml new file mode 100644 index 00000000..6286e886 --- /dev/null +++ b/lib/ocaml/baseline/palindrome.ml @@ -0,0 +1,17 @@ +let is_palindrome s = + let n = String.length s in + let rec check i j = + if i >= j then true + else if s.[i] <> s.[j] then false + else check (i + 1) (j - 1) + in + check 0 (n - 1) + +;; + +(if is_palindrome "racecar" then 1 else 0) + +(if is_palindrome "hello" then 1 else 0) + +(if is_palindrome "abba" then 1 else 0) + +(if is_palindrome "" then 1 else 0) + +(if is_palindrome "a" then 1 else 0) + +(if is_palindrome "ab" then 1 else 0) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0d6bb898..63949809 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — palindrome.ml baseline (two-pointer + palindrome check, 4 of 6 inputs are palindromes). is_palindrome + walks from both ends meeting in the middle, returning false on + first mismatch. Tests on six strings (racecar ✓, hello ✗, abba ✓, + "" ✓, "a" ✓, "ab" ✗) sum to 4. Uses `s.[i] <> s.[j]` (string-get + + structural inequality) and recursive 2-arg pointer advancement. + 48 baseline programs total. - 2026-05-09 Phase 5.1 — coin_change.ml baseline (min coin DP, 67¢ with [1;5;10;25] → 6 coins). Bottom-up DP: `dp[i]` = min coins for amount i. For each amount 1..target, iterate coins and From a91ff62730e0e540fdabb5e9b6ede3f2712cd8e3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 10:40:49 +0000 Subject: [PATCH 142/298] ocaml: phase 5.1 bsearch.ml baseline (binary search, position sum = 7) Iterative binary search on a sorted int array: let bsearch arr target = let n = Array.length arr in let lo = ref 0 and hi = ref (n - 1) in let found = ref (-1) in while !lo <= !hi && !found = -1 do let mid = (!lo + !hi) / 2 in if arr.(mid) = target then found := mid else if arr.(mid) < target then lo := mid + 1 else hi := mid - 1 done; !found For [1;3;5;7;9;11;13;15;17;19;21]: bsearch a 13 = 6 bsearch a 5 = 2 bsearch a 100 = -1 sum = 7 Exercises Array.of_list + arr.(i) + multi-let 'let lo = ... and hi = ...' + while + multi-arm if/else if/else. 49 baseline programs total. --- lib/ocaml/baseline/bsearch.ml | 16 ++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/bsearch.ml diff --git a/lib/ocaml/baseline/bsearch.ml b/lib/ocaml/baseline/bsearch.ml new file mode 100644 index 00000000..fb2df41f --- /dev/null +++ b/lib/ocaml/baseline/bsearch.ml @@ -0,0 +1,16 @@ +let bsearch arr target = + let n = Array.length arr in + let lo = ref 0 and hi = ref (n - 1) in + let found = ref (-1) in + while !lo <= !hi && !found = -1 do + let mid = (!lo + !hi) / 2 in + if arr.(mid) = target then found := mid + else if arr.(mid) < target then lo := mid + 1 + else hi := mid - 1 + done; + !found + +;; + +let a = Array.of_list [1;3;5;7;9;11;13;15;17;19;21] in +bsearch a 13 + bsearch a 5 + bsearch a 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index cecf44fb..b97ffddf 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -7,6 +7,7 @@ "bfs.ml": 6, "btree.ml": 39, "brainfuck.ml": 75, + "bsearch.ml": 7, "caesar.ml": 215, "calc.ml": 13, "closures.ml": 315, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 63949809..5a516692 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — bsearch.ml baseline (binary search, + position-sum 6 + 2 + (-1) = 7). Iterative bsearch on a sorted int + array using two index refs `lo`/`hi` and a sentinel `found = -1`. + while loop runs until `lo > hi` or found set. For [1;3;...;21]: + position of 13 = 6, 5 = 2, 100 = -1. Exercises Array.of_list + + arr.(i) + multi-let `let lo = ... and hi = ...` + while + multi-arm + if/else if. 49 baseline programs total. - 2026-05-09 Phase 5.1 — palindrome.ml baseline (two-pointer palindrome check, 4 of 6 inputs are palindromes). is_palindrome walks from both ends meeting in the middle, returning false on From bafa2410e494df30c8cd95263ce9df486e771e50 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 11:00:00 +0000 Subject: [PATCH 143/298] ocaml: phase 5.1 mat_mul.ml baseline (3x3 row-major matrix multiply, sum = 621) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Triple-nested for loop with row-major indexing: for i = 0 to n - 1 do for j = 0 to n - 1 do for k = 0 to n - 1 do c.(i * n + j) <- c.(i * n + j) + a.(i * n + k) * b.(k * n + j) done done done For 3x3 matrices A=[[1..9]] and B=[[9..1]], the resulting C has sum 621. Tests deeply nested for loops on Array, Array.make + arr.(i) + arr.(i) <- v + Array.fold_left. 50 baseline programs total — milestone. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/mat_mul.ml | 18 ++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/ocaml/baseline/mat_mul.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b97ffddf..35b76e82 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -22,6 +22,7 @@ "hanoi.ml": 1023, "fizzbuzz.ml": 57, "list_ops.ml": 30, + "mat_mul.ml": 621, "json_pretty.ml": 24, "kadane.ml": 6, "lambda_calc.ml": 7, diff --git a/lib/ocaml/baseline/mat_mul.ml b/lib/ocaml/baseline/mat_mul.ml new file mode 100644 index 00000000..e7593f4f --- /dev/null +++ b/lib/ocaml/baseline/mat_mul.ml @@ -0,0 +1,18 @@ +let mat_mul a b n = + let c = Array.make (n * n) 0 in + for i = 0 to n - 1 do + for j = 0 to n - 1 do + for k = 0 to n - 1 do + c.(i * n + j) <- c.(i * n + j) + a.(i * n + k) * b.(k * n + j) + done + done + done; + c + +;; + +let n = 3 in +let a = Array.of_list [1;2;3; 4;5;6; 7;8;9] in +let b = Array.of_list [9;8;7; 6;5;4; 3;2;1] in +let c = mat_mul a b n in +Array.fold_left (+) 0 c diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5a516692..68e5a959 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — mat_mul.ml baseline (3x3 row-major matrix + multiply, sum of result = 621). Triple-nested for loop over + i / j / k with row-major indexing `c.(i * n + j) <- c.(i * n + j) + + a.(i * n + k) * b.(k * n + j)`. Tests deeply nested for loops on + Array, Array.make + arr.(i) + arr.(i) <- v + Array.fold_left, and + multi-arg let chains with intermediate Array bindings. 50 baseline + programs total — milestone. - 2026-05-09 Phase 5.1 — bsearch.ml baseline (binary search, position-sum 6 + 2 + (-1) = 7). Iterative bsearch on a sorted int array using two index refs `lo`/`hi` and a sentinel `found = -1`. From aa0a7fa1a24907e040ecb5f4e1b27dce45e04ddb Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 11:09:23 +0000 Subject: [PATCH 144/298] ocaml: phase 5.1 expr_simp.ml baseline (symbolic simplifier, eval(simp e) = 22) Recursive ADT with three constructors (Num/Add/Mul). simp does bottom-up rewrite using algebraic identities: x + 0 -> x 0 + x -> x x * 0 -> 0 0 * x -> 0 x * 1 -> x 1 * x -> x constant folding for Num + Num and Num * Num Uses tuple pattern in nested match: 'match (simp a, simp b) with'. Add (Mul (Num 3, Num 5), Add (Num 0, Mul (Num 1, Num 7))) -> simp -> Add (Num 15, Num 7) -> eval -> 22 51 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/expr_simp.ml | 33 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 42 insertions(+) create mode 100644 lib/ocaml/baseline/expr_simp.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 35b76e82..26b0e10a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -15,6 +15,7 @@ "csv.ml": 10, "exception_handle.ml": 4, "expr_eval.ml": 16, + "expr_simp.ml": 22, "factorial.ml": 3628800, "fraction.ml": 7, "frequency.ml": 5, diff --git a/lib/ocaml/baseline/expr_simp.ml b/lib/ocaml/baseline/expr_simp.ml new file mode 100644 index 00000000..a68c65b6 --- /dev/null +++ b/lib/ocaml/baseline/expr_simp.ml @@ -0,0 +1,33 @@ +type expr = + | Num of int + | Add of expr * expr + | Mul of expr * expr + +let rec simp e = + match e with + | Num n -> Num n + | Add (a, b) -> + (match (simp a, simp b) with + | (Num 0, x) -> x + | (x, Num 0) -> x + | (Num n, Num m) -> Num (n + m) + | (a', b') -> Add (a', b')) + | Mul (a, b) -> + (match (simp a, simp b) with + | (Num 0, _) -> Num 0 + | (_, Num 0) -> Num 0 + | (Num 1, x) -> x + | (x, Num 1) -> x + | (Num n, Num m) -> Num (n * m) + | (a', b') -> Mul (a', b')) + +let rec eval e = + match e with + | Num n -> n + | Add (a, b) -> eval a + eval b + | Mul (a, b) -> eval a * eval b + +;; + +let e = Add (Mul (Num 3, Num 5), Add (Num 0, Mul (Num 1, Num 7))) in +eval (simp e) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 68e5a959..8c117cba 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — expr_simp.ml baseline (symbolic expression + simplifier, eval (simp e) = 22). Recursive ADT with three + constructors (Num/Add/Mul). simp does bottom-up rewrite using + algebraic identities: x+0 → x, 0+x → x, x*0 → 0, 0*x → 0, x*1 → x, + 1*x → x, constant folding both. Uses tuple pattern in nested match + (`match (simp a, simp b) with`). For `Add (Mul (Num 3, Num 5), + Add (Num 0, Mul (Num 1, Num 7)))` → simp → `Add (Num 15, Num 7)` + → eval → 22. 51 baseline programs total. - 2026-05-09 Phase 5.1 — mat_mul.ml baseline (3x3 row-major matrix multiply, sum of result = 621). Triple-nested for loop over i / j / k with row-major indexing `c.(i * n + j) <- c.(i * n + j) + From ac19b7aced499c5deded800b62edba8f0484dc70 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 11:19:23 +0000 Subject: [PATCH 145/298] ocaml: phase 5.1 bigint_add.ml baseline (digit-list bignum add, 1+18+9 = 28) Recursive 4-arm match on (a, b) tuples threading a carry: match (a, b) with | ([], []) -> if carry = 0 then [] else [carry] | (x :: xs, []) -> (s mod 10) :: aux xs [] (s / 10) where s = x + carry | ([], y :: ys) -> ... | (x :: xs, y :: ys) -> ... where s = x + y + carry Little-endian digit lists. Three tests: [9;9;9] + [1] = [0;0;0;1] (=1000, digit sum 1) [5;6;7] + [8;9;1] = [3;6;9] (=963, digit sum 18) [9;9;9;9;9;9;9;9] + [1] length 9 (carry propagates 8x) Sum = 1 + 18 + 9 = 28. Exercises tuple-pattern match on nested list-cons with the integer arithmetic and carry-threading idiom typical of multi-precision implementations. 52 baseline programs total. --- lib/ocaml/baseline/bigint_add.ml | 24 ++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 34 insertions(+) create mode 100644 lib/ocaml/baseline/bigint_add.ml diff --git a/lib/ocaml/baseline/bigint_add.ml b/lib/ocaml/baseline/bigint_add.ml new file mode 100644 index 00000000..87a5a195 --- /dev/null +++ b/lib/ocaml/baseline/bigint_add.ml @@ -0,0 +1,24 @@ +let bigint_add a b = + let rec aux a b carry = + match (a, b) with + | ([], []) -> if carry = 0 then [] else [carry] + | (x :: xs, []) -> + let s = x + carry in + (s mod 10) :: aux xs [] (s / 10) + | ([], y :: ys) -> + let s = y + carry in + (s mod 10) :: aux [] ys (s / 10) + | (x :: xs, y :: ys) -> + let s = x + y + carry in + (s mod 10) :: aux xs ys (s / 10) + in + aux a b 0 + +;; + +let r1 = bigint_add [9;9;9] [1] in +let r2 = bigint_add [5;6;7] [8;9;1] in +let r3 = bigint_add [9;9;9;9;9;9;9;9] [1] in +List.fold_left (+) 0 r1 + + List.fold_left (+) 0 r2 + + List.length r3 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 26b0e10a..3f93f13d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -2,6 +2,7 @@ "ackermann.ml": 125, "anagrams.ml": 3, "bag.ml": 3, + "bigint_add.ml": 28, "bits.ml": 21, "balance.ml": 3, "bfs.ml": 6, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 8c117cba..8fdf4adc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — bigint_add.ml baseline (digit-list big-num + add, 28 = 1+18+9). Recursive 4-arm match on `(a, b)` tuples + threading a carry: `(x::xs, y::ys) -> (s mod 10) :: aux xs ys (s + / 10)`. Three test cases: + bigint_add [9;9;9] [1] = [0;0;0;1] (digit sum 1) + bigint_add [5;6;7] [8;9;1] = [3;6;9] (digit sum 18, 765+198=963) + bigint_add [9;9;9;9;9;9;9;9] [1] length 9 (carry propagates 8 places) + Sum: 1 + 18 + 9 = 28. Exercises tuple-pattern match on nested + list-cons + integer arithmetic. 52 baseline programs total. - 2026-05-09 Phase 5.1 — expr_simp.ml baseline (symbolic expression simplifier, eval (simp e) = 22). Recursive ADT with three constructors (Num/Add/Mul). simp does bottom-up rewrite using From b8dfc080dd90fb183857f129c70ae15edf64ce9f Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 11:33:30 +0000 Subject: [PATCH 146/298] ocaml: phase 5.1 zip_unzip.ml baseline (zip/unzip round-trip, sum-product = 1000) zip walks both lists in lockstep, truncating at the shorter. unzip uses tuple-pattern destructuring on the recursive result. let pairs = zip [1;2;3;4] [10;20;30;40] in let (xs, ys) = unzip pairs in List.fold_left (+) 0 xs * List.fold_left (+) 0 ys = 10 * 100 = 1000 Exercises: - tuple-cons patterns in match scrutinee: 'match (xs, ys) with' - tuple constructor in return value: '(a :: la, b :: lb)' - the iter-98 let-tuple destructuring: 'let (la, lb) = unzip rest' 53 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/zip_unzip.ml | 18 ++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/zip_unzip.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3f93f13d..2f291158 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -48,6 +48,7 @@ "safe_div.ml": 20, "shuffle.ml": 55, "word_freq.ml": 8, + "zip_unzip.ml": 1000, "sieve.ml": 15, "sum_squares.ml": 385, "unique_set.ml": 9, diff --git a/lib/ocaml/baseline/zip_unzip.ml b/lib/ocaml/baseline/zip_unzip.ml new file mode 100644 index 00000000..b64787db --- /dev/null +++ b/lib/ocaml/baseline/zip_unzip.ml @@ -0,0 +1,18 @@ +let rec zip xs ys = + match (xs, ys) with + | ([], _) -> [] + | (_, []) -> [] + | (x :: xs', y :: ys') -> (x, y) :: zip xs' ys' + +let rec unzip pairs = + match pairs with + | [] -> ([], []) + | (a, b) :: rest -> + let (la, lb) = unzip rest in + (a :: la, b :: lb) + +;; + +let pairs = zip [1;2;3;4] [10;20;30;40] in +let (xs, ys) = unzip pairs in +List.fold_left (+) 0 xs * List.fold_left (+) 0 ys diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 8fdf4adc..82eee160 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — zip_unzip.ml baseline (list zip/unzip + round-trip, sum-product = 1000). zip walks both lists in lockstep + truncating at the shorter; unzip uses tuple-pattern destructuring + on the recursive result. After zip [1;2;3;4] [10;20;30;40] + + unzip, sums are 10 and 100 → product 1000. Exercises tuple-cons + patterns in match scrutinee `(xs, ys)`, tuple constructor in + return value `(a :: la, b :: lb)`, and the iter-98 let-tuple + destructuring `let (la, lb) = unzip rest in`. 53 baseline programs + total. - 2026-05-09 Phase 5.1 — bigint_add.ml baseline (digit-list big-num add, 28 = 1+18+9). Recursive 4-arm match on `(a, b)` tuples threading a carry: `(x::xs, y::ys) -> (s mod 10) :: aux xs ys (s From cca3a2820675134c584539bd6aa8bdba3f92a42a Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 11:42:52 +0000 Subject: [PATCH 147/298] ocaml: phase 5.1 gcd_lcm.ml baseline (Euclidean gcd + lcm, 12+12+36 = 60) Two-line baseline: let rec gcd a b = if b = 0 then a else gcd b (a mod b) let lcm a b = a * b / gcd a b gcd 36 48 = 12 lcm 4 6 = 12 lcm 12 18 = 36 sum = 60 Tests mod arithmetic and the integer-division fix from iteration 94 (without truncate-toward-zero, 'lcm 4 6 = 4 * 6 / 2 = 12.0' rather than the expected 12). 54 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/gcd_lcm.ml | 6 ++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 13 insertions(+) create mode 100644 lib/ocaml/baseline/gcd_lcm.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 2f291158..140edeea 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -20,6 +20,7 @@ "factorial.ml": 3628800, "fraction.ml": 7, "frequency.ml": 5, + "gcd_lcm.ml": 60, "grep_count.ml": 3, "hanoi.ml": 1023, "fizzbuzz.ml": 57, diff --git a/lib/ocaml/baseline/gcd_lcm.ml b/lib/ocaml/baseline/gcd_lcm.ml new file mode 100644 index 00000000..5b958db0 --- /dev/null +++ b/lib/ocaml/baseline/gcd_lcm.ml @@ -0,0 +1,6 @@ +let rec gcd a b = if b = 0 then a else gcd b (a mod b) + +let lcm a b = a * b / gcd a b +;; + +gcd 36 48 + lcm 4 6 + lcm 12 18 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 82eee160..377f0165 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — gcd_lcm.ml baseline (Euclidean gcd + lcm, + 12 + 12 + 36 = 60). Two-line baseline: `let rec gcd a b = if b = 0 + then a else gcd b (a mod b)` + `let lcm a b = a * b / gcd a b`. + Tests `mod` arithmetic and the integer-division fix from + iteration 94 (without truncate-toward-zero, lcm 4 6 = 4*6 / 2 = + 12.0 not 12). 54 baseline programs total. - 2026-05-09 Phase 5.1 — zip_unzip.ml baseline (list zip/unzip round-trip, sum-product = 1000). zip walks both lists in lockstep truncating at the shorter; unzip uses tuple-pattern destructuring From 027678f31e69a6f8266763819e6ac66c735e9e52 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 11:52:19 +0000 Subject: [PATCH 148/298] ocaml: phase 5.1 flatten_tree.ml baseline (parametric ADT flatten, sum 1..7 = 28) Defines a parametric tree: type 'a tree = Leaf of 'a | Node of 'a tree list let rec flatten t = match t with | Leaf x -> [x] | Node ts -> List.concat (List.map flatten ts) Test tree has 3 levels of nesting: Node [Leaf 1; Node [Leaf 2; Leaf 3]; Node [Node [Leaf 4]; Leaf 5; Leaf 6]; Leaf 7] flattens to [1;2;3;4;5;6;7] -> sum = 28. Tests parametric ADT, mutual recursion via map+self, List.concat. 55 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/flatten_tree.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 25 insertions(+) create mode 100644 lib/ocaml/baseline/flatten_tree.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 140edeea..037e7084 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -24,6 +24,7 @@ "grep_count.ml": 3, "hanoi.ml": 1023, "fizzbuzz.ml": 57, + "flatten_tree.ml": 28, "list_ops.ml": 30, "mat_mul.ml": 621, "json_pretty.ml": 24, diff --git a/lib/ocaml/baseline/flatten_tree.ml b/lib/ocaml/baseline/flatten_tree.ml new file mode 100644 index 00000000..8572dd4b --- /dev/null +++ b/lib/ocaml/baseline/flatten_tree.ml @@ -0,0 +1,17 @@ +type 'a tree = Leaf of 'a | Node of 'a tree list + +let rec flatten t = + match t with + | Leaf x -> [x] + | Node ts -> List.concat (List.map flatten ts) + +;; + +let t = Node [ + Leaf 1; + Node [Leaf 2; Leaf 3]; + Node [Node [Leaf 4]; Leaf 5; Leaf 6]; + Leaf 7 +] +in +List.fold_left (+) 0 (flatten t) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 377f0165..e185bb34 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — flatten_tree.ml baseline (parametric ADT + flatten, sum 1..7 = 28). Defines `type 'a tree = Leaf of 'a | + Node of 'a tree list` then `flatten` recursively expands using + `List.concat (List.map flatten ts)`. Tree has 3 levels of + nesting; flattens to [1;2;3;4;5;6;7]. Tests parametric ADT, mutual + recursion via map+self, List.concat from runtime. 55 baseline + programs total. - 2026-05-09 Phase 5.1 — gcd_lcm.ml baseline (Euclidean gcd + lcm, 12 + 12 + 36 = 60). Two-line baseline: `let rec gcd a b = if b = 0 then a else gcd b (a mod b)` + `let lcm a b = a * b / gcd a b`. From acc8b01ddbab08656fcce3a0cb29e8ff319ebfc5 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 12:04:42 +0000 Subject: [PATCH 149/298] ocaml: phase 5.1 exception_user.ml baseline (user exception with payload, 4+5+7+10 = 26) Defines a user exception with int payload: exception Negative of int let safe_sqrt n = if n < 0 then raise (Negative n) else let try_sqrt n = try safe_sqrt n with | Negative x -> -x try_sqrt 16 -> 4 try_sqrt 25 -> 5 try_sqrt -7 -> 7 (handler returns -(-7) = 7) try_sqrt 100 -> 10 sum -> 26 Tests exception declaration with int payload, raise with carry, and try-with arm pattern-matching the constructor with payload binding. 56 baseline programs total. --- lib/ocaml/baseline/exception_user.ml | 16 ++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/exception_user.ml diff --git a/lib/ocaml/baseline/exception_user.ml b/lib/ocaml/baseline/exception_user.ml new file mode 100644 index 00000000..1b00455a --- /dev/null +++ b/lib/ocaml/baseline/exception_user.ml @@ -0,0 +1,16 @@ +exception Negative of int + +let safe_sqrt n = + if n < 0 then raise (Negative n) + else + let g = ref 1 in + while !g * !g < n do g := !g + 1 done; + !g + +let try_sqrt n = + try safe_sqrt n with + | Negative x -> -x + +;; + +try_sqrt 16 + try_sqrt 25 + try_sqrt (-7) + try_sqrt 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 037e7084..bb5b75fe 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -15,6 +15,7 @@ "coin_change.ml": 6, "csv.ml": 10, "exception_handle.ml": 4, + "exception_user.ml": 26, "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e185bb34..adcdbb15 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — exception_user.ml baseline (user-defined + exception with int payload, 4+5+7+10 = 26). Defines `exception + Negative of int`, `safe_sqrt` raises it on negative input, and + `try_sqrt` catches with `try ... with | Negative x -> -x`. Tests + exception declaration, raise with carry-payload, try-with arm + matching the constructor and binding the payload. 56 baseline + programs total. - 2026-05-09 Phase 5.1 — flatten_tree.ml baseline (parametric ADT flatten, sum 1..7 = 28). Defines `type 'a tree = Leaf of 'a | Node of 'a tree list` then `flatten` recursively expands using From 5c587c0f61ee4c158fd8ef823132967abfc95b24 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 12:14:32 +0000 Subject: [PATCH 150/298] ocaml: phase 5.1 anagram_check.ml baseline (char-frequency array, 2/4 anagrams) to_counts builds a 256-slot int array of character frequencies: let to_counts s = let counts = Array.make 256 0 in for i = 0 to String.length s - 1 do let c = Char.code s.[i] in counts.(c) <- counts.(c) + 1 done; counts same_counts compares two arrays element-by-element via for loop + bool ref. is_anagram composes them. Four pairs: listen ~ silent true hello !~ world false anagram ~ nagaram true abc !~ abcd false (length differs) sum 2 Exercises Array.make + arr.(i) + arr.(i) <- v + nested for loops + Char.code + s.[i]. 57 baseline programs total. --- lib/ocaml/baseline/anagram_check.ml | 23 +++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/anagram_check.ml diff --git a/lib/ocaml/baseline/anagram_check.ml b/lib/ocaml/baseline/anagram_check.ml new file mode 100644 index 00000000..641cc0e6 --- /dev/null +++ b/lib/ocaml/baseline/anagram_check.ml @@ -0,0 +1,23 @@ +let to_counts s = + let counts = Array.make 256 0 in + for i = 0 to String.length s - 1 do + let c = Char.code s.[i] in + counts.(c) <- counts.(c) + 1 + done; + counts + +let same_counts a b = + let result = ref true in + for i = 0 to 255 do + if a.(i) <> b.(i) then result := false + done; + !result + +let is_anagram s t = same_counts (to_counts s) (to_counts t) + +;; + +(if is_anagram "listen" "silent" then 1 else 0) + +(if is_anagram "hello" "world" then 1 else 0) + +(if is_anagram "anagram" "nagaram" then 1 else 0) + +(if is_anagram "abc" "abcd" then 1 else 0) diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index bb5b75fe..e9ad3ee5 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,5 +1,6 @@ { "ackermann.ml": 125, + "anagram_check.ml": 2, "anagrams.ml": 3, "bag.ml": 3, "bigint_add.ml": 28, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index adcdbb15..3a86cdb2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — anagram_check.ml baseline (char-frequency + array, 2/4 pairs are anagrams). to_counts builds a 256-slot int + array of character frequencies. same_counts compares two arrays + element-by-element. is_anagram = same_counts on both. listen ~ + silent ✓, hello ≠ world, anagram ~ nagaram ✓, abc ≠ abcd → 2. + Exercises Array.make + arr.(i) + arr.(i) <- v + nested for loops + + Char.code + s.[i]. 57 baseline programs total. - 2026-05-09 Phase 5.1 — exception_user.ml baseline (user-defined exception with int payload, 4+5+7+10 = 26). Defines `exception Negative of int`, `safe_sqrt` raises it on negative input, and From 097c7f45908a0c841cb312d649e706acd28fac43 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 12:24:48 +0000 Subject: [PATCH 151/298] ocaml: phase 5.1 bf_full.ml baseline (full Brainfuck with [] loops, +++[.-] = 6) Extends the iter-92 brainfuck.ml subset interpreter with bracket matching: '[' if mem[ptr] = 0, jump past matching ']' (forward depth-counting scan: '[' increments depth, ']' decrements) ']' if mem[ptr] <> 0, jump back to matching '[' (backward depth-counting scan) Test program '+++[.-]': +++ set cell 0 = 3 [ enter loop (cell != 0) . acc += cell - cell -= 1 ] loop while cell != 0 result: acc = 3 + 2 + 1 = 6 Tests deeply nested while loops, mutable pc / ptr / acc, multi-arm if/else if dispatch on chars + nested begin/end blocks for loop body conditionals. 58 baseline programs total. --- lib/ocaml/baseline/bf_full.ml | 42 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 ++++++ 3 files changed, 50 insertions(+) create mode 100644 lib/ocaml/baseline/bf_full.ml diff --git a/lib/ocaml/baseline/bf_full.ml b/lib/ocaml/baseline/bf_full.ml new file mode 100644 index 00000000..65b3c8a0 --- /dev/null +++ b/lib/ocaml/baseline/bf_full.ml @@ -0,0 +1,42 @@ +let interpret prog = + let mem = Array.make 256 0 in + let ptr = ref 0 in + let pc = ref 0 in + let n = String.length prog in + let acc = ref 0 in + while !pc < n do + let c = prog.[!pc] in + (if c = '>' then ptr := !ptr + 1 + else if c = '<' then ptr := !ptr - 1 + else if c = '+' then mem.(!ptr) <- mem.(!ptr) + 1 + else if c = '-' then mem.(!ptr) <- mem.(!ptr) - 1 + else if c = '.' then acc := !acc + mem.(!ptr) + else if c = '[' then begin + if mem.(!ptr) = 0 then begin + let depth = ref 1 in + while !depth > 0 do + pc := !pc + 1; + let c = prog.[!pc] in + if c = '[' then depth := !depth + 1 + else if c = ']' then depth := !depth - 1 + done + end + end + else if c = ']' then begin + if mem.(!ptr) <> 0 then begin + let depth = ref 1 in + while !depth > 0 do + pc := !pc - 1; + let c = prog.[!pc] in + if c = ']' then depth := !depth + 1 + else if c = '[' then depth := !depth - 1 + done + end + end); + pc := !pc + 1 + done; + !acc + +;; + +interpret "+++[.-]" diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e9ad3ee5..ccdfe1bb 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -3,6 +3,7 @@ "anagram_check.ml": 2, "anagrams.ml": 3, "bag.ml": 3, + "bf_full.ml": 6, "bigint_add.ml": 28, "bits.ml": 21, "balance.ml": 3, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3a86cdb2..b6486d70 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — bf_full.ml baseline (Brainfuck interpreter + with `[`/`]` loops, `+++[.-]` → 3+2+1 = 6). Extends the iter-92 + brainfuck.ml subset with bracket matching: `[` jumps past matching + `]` if cell is zero (forward depth-counting scan); `]` jumps back + to matching `[` if cell is non-zero (backward depth-counting scan). + Tests deeply nested while loops, mutable pc + ptr + acc, multi-arm + if-else if dispatch on chars. 58 baseline programs total. - 2026-05-09 Phase 5.1 — anagram_check.ml baseline (char-frequency array, 2/4 pairs are anagrams). to_counts builds a 256-slot int array of character frequencies. same_counts compares two arrays From f5122a9a5df418c7b7c1057742002fba67518d13 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 12:34:36 +0000 Subject: [PATCH 152/298] ocaml: phase 5.1 atm.ml baseline (mutable record + exception + try/with, balance = 120) Models a bank account using a mutable record + a user exception: type account = { mutable balance : int } exception Insufficient let withdraw acct amt = if amt > acct.balance then raise Insufficient else acct.balance <- acct.balance - amt Sequence: start 100 deposit 50 150 withdraw 30 120 withdraw 200 raises Insufficient handler returns acct.balance (= 120, transaction rolled back) Combines mutable record fields, user exception declaration, try-with-bare-pattern, and verifies that a raise in the middle of a sequence doesn't leave a partial mutation. 59 baseline programs total. --- lib/ocaml/baseline/atm.ml | 17 +++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/atm.ml diff --git a/lib/ocaml/baseline/atm.ml b/lib/ocaml/baseline/atm.ml new file mode 100644 index 00000000..cf8b66e0 --- /dev/null +++ b/lib/ocaml/baseline/atm.ml @@ -0,0 +1,17 @@ +type account = { mutable balance : int } + +exception Insufficient + +let withdraw acct amt = + if amt > acct.balance then raise Insufficient + else acct.balance <- acct.balance - amt + +let deposit acct amt = acct.balance <- acct.balance + amt + +;; + +let a = { balance = 100 } in +deposit a 50; +withdraw a 30; +try (withdraw a 200; -1) +with Insufficient -> a.balance diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ccdfe1bb..1db31844 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -2,6 +2,7 @@ "ackermann.ml": 125, "anagram_check.ml": 2, "anagrams.ml": 3, + "atm.ml": 120, "bag.ml": 3, "bf_full.ml": 6, "bigint_add.ml": 28, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b6486d70..accd5f67 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — atm.ml baseline (mutable record + custom + exception + try/with, balance 120 after rollback). Models a bank + account: deposit/withdraw mutate the balance field; an over-draw + raises `Insufficient` which the caller catches and falls back to + the unchanged balance. Starting at 100, +50 = 150, -30 = 120, + attempted -200 throws and the handler returns 120. Combines + mutable record fields, user exception declaration, and + try-with-bare-pattern in a realistic micro-pattern. 59 baseline + programs total. - 2026-05-09 Phase 5.1 — bf_full.ml baseline (Brainfuck interpreter with `[`/`]` loops, `+++[.-]` → 3+2+1 = 6). Extends the iter-92 brainfuck.ml subset with bracket matching: `[` jumps past matching From af38d98583277f7234b6cd27fa88202fb17026a6 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 12:44:02 +0000 Subject: [PATCH 153/298] ocaml: phase 5.1 prime_factors.ml baseline (trial-division, 360 factor sum = 17) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three refs threading through a while loop: m remaining quotient d current divisor result accumulator (built in reverse, List.rev at end) while !m > 1 do if !m mod !d = 0 then begin result := !d :: !result; m := !m / !d end else d := !d + 1 done 360 = 2^3 * 3^2 * 5 factors to [2;2;2;3;3;5], sum 17. 60 baseline programs total — milestone. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/prime_factors.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 23 insertions(+) create mode 100644 lib/ocaml/baseline/prime_factors.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 1db31844..12d1d880 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -46,6 +46,7 @@ "pi_leibniz.ml": 314, "pretty_table.ml": 64, "poly_stack.ml": 5, + "prime_factors.ml": 17, "queens.ml": 2, "quicksort.ml": 44, "roman.ml": 44, diff --git a/lib/ocaml/baseline/prime_factors.ml b/lib/ocaml/baseline/prime_factors.ml new file mode 100644 index 00000000..382f0e8b --- /dev/null +++ b/lib/ocaml/baseline/prime_factors.ml @@ -0,0 +1,16 @@ +let factor n = + let result = ref [] in + let m = ref n in + let d = ref 2 in + while !m > 1 do + if !m mod !d = 0 then begin + result := !d :: !result; + m := !m / !d + end else + d := !d + 1 + done; + List.rev !result + +;; + +List.fold_left (+) 0 (factor 360) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index accd5f67..1cd88fe1 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — prime_factors.ml baseline (trial-division + factorisation, sum of factors of 360 = 17). Three refs threading + through a while loop: m holds the remaining quotient, d the + current divisor, result accumulates factors. When `m mod d = 0`, + push d and divide; otherwise increment d. 360 = 2^3 * 3^2 * 5 + factors to [2;2;2;3;3;5], sum 17. 60 baseline programs total. - 2026-05-09 Phase 5.1 — atm.ml baseline (mutable record + custom exception + try/with, balance 120 after rollback). Models a bank account: deposit/withdraw mutate the balance field; an over-draw From 05487b497d2893572fb2195209a3a8273d5de4bb Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 12:52:55 +0000 Subject: [PATCH 154/298] ocaml: phase 5.1 base_n.ml baseline (int to base-N string, length sum = 17) 36-character digit alphabet '0..9A..Z' supports any base 2..36. Loop divides the magnitude by base and prepends the digit: while !m > 0 do acc := String.make 1 digits.[!m mod base] ^ !acc; m := !m / base done Special-cases n = 0 -> '0' and prepends '-' for negatives. Test cases (length, since the strings differ in alphabet): 255 hex 'FF' 2 1024 binary '10000000000' 11 100 dec '100' 3 0 any base '0' 1 sum 17 Combines digits.[i] (string indexing) + String.make 1 ch + String concatenation in a loop. 61 baseline programs total. --- lib/ocaml/baseline/base_n.ml | 19 +++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/base_n.ml diff --git a/lib/ocaml/baseline/base_n.ml b/lib/ocaml/baseline/base_n.ml new file mode 100644 index 00000000..f3edb209 --- /dev/null +++ b/lib/ocaml/baseline/base_n.ml @@ -0,0 +1,19 @@ +let to_base_n n base = + let digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" in + if n = 0 then "0" + else begin + let m = ref (abs n) in + let acc = ref "" in + while !m > 0 do + acc := String.make 1 digits.[!m mod base] ^ !acc; + m := !m / base + done; + if n < 0 then "-" ^ !acc else !acc + end + +;; + +String.length (to_base_n 255 16) + +String.length (to_base_n 1024 2) + +String.length (to_base_n 100 10) + +String.length (to_base_n 0 16) diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 12d1d880..3678a818 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -8,6 +8,7 @@ "bigint_add.ml": 28, "bits.ml": 21, "balance.ml": 3, + "base_n.ml": 17, "bfs.ml": 6, "btree.ml": 39, "brainfuck.ml": 75, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1cd88fe1..bad1dfe8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — base_n.ml baseline (int -> base-N string, + length sum 2+11+3+1 = 17). 36-character digit alphabet supports up + to base 36. Loop divides quotient by base, prepends digit. Tests: + 255 hex "FF" 2 + 1024 binary "10000000000" 11 + 100 dec "100" 3 + 0 any base "0" 1 + Sum 17. Combines digits.[!m mod base] (string indexing) + + String.make 1 ch (1-char string from 1-char string in our model) + + String concatenation in a loop. 61 baseline programs total. - 2026-05-09 Phase 5.1 — prime_factors.ml baseline (trial-division factorisation, sum of factors of 360 = 17). Three refs threading through a while loop: m holds the remaining quotient, d the From 50981a2a9bd600bbe90650cec28d64bad2234382 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 13:02:17 +0000 Subject: [PATCH 155/298] ocaml: phase 5.1 bisect.ml baseline (root-finding, sqrt(2)*100 = 141) Bisection method searching for f(x) = 0 in [lo, hi] over 50 iterations: let bisect f lo hi = let lo = ref lo and hi = ref hi in for _ = 1 to 50 do let mid = (!lo +. !hi) /. 2.0 in if f mid = 0.0 || f !lo *. f mid < 0.0 then hi := mid else lo := mid done; !lo Solving x^2 - 2 = 0 in [1, 2] via 'bisect (fun x -> x *. x -. 2.0) 1.0 2.0' converges to ~1.41421356... -> int_of_float (r *. 100) = 141. Tests: - higher-order function passing - multi-let 'let lo = ref ... and hi = ref ...' - float arithmetic - int_of_float truncate-toward-zero (iter 117) 62 baseline programs total. --- lib/ocaml/baseline/bisect.ml | 13 +++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/bisect.ml diff --git a/lib/ocaml/baseline/bisect.ml b/lib/ocaml/baseline/bisect.ml new file mode 100644 index 00000000..e7e5b9e6 --- /dev/null +++ b/lib/ocaml/baseline/bisect.ml @@ -0,0 +1,13 @@ +let bisect f lo hi = + let lo = ref lo and hi = ref hi in + for _ = 1 to 50 do + let mid = (!lo +. !hi) /. 2.0 in + if f mid = 0.0 || f !lo *. f mid < 0.0 then hi := mid + else lo := mid + done; + !lo + +;; + +let r = bisect (fun x -> x *. x -. 2.0) 1.0 2.0 in +int_of_float (r *. 100.0) diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3678a818..9788d392 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -5,6 +5,7 @@ "atm.ml": 120, "bag.ml": 3, "bf_full.ml": 6, + "bisect.ml": 141, "bigint_add.ml": 28, "bits.ml": 21, "balance.ml": 3, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bad1dfe8..acf3e929 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — bisect.ml baseline (root-finding via + bisection, sqrt(2) * 100 = 141). 50 iterations of bisection + searching for x^2 - 2 = 0 in [1, 2]. Tests higher-order function + passing (the function-to-zero is `(fun x -> x *. x -. 2.0)`), + multi-let `let lo = ref ... and hi = ref ...`, float arithmetic, + and the int_of_float truncate-toward-zero from iteration 117. 62 + baseline programs total. - 2026-05-09 Phase 5.1 — base_n.ml baseline (int -> base-N string, length sum 2+11+3+1 = 17). 36-character digit alphabet supports up to base 36. Loop divides quotient by base, prepends digit. Tests: From 0eef5bc8e69ba8dda58cc4bd32726e3bcd1d6c98 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 13:15:05 +0000 Subject: [PATCH 156/298] ocaml: phase 5.1 twosum.ml baseline (LeetCode #1 one-pass hashmap, index sum = 5) Walks list with List.iteri, checking if target - x is already in the hashtable; if yes, the earlier index plus current is the answer; otherwise record the current pair. twosum [2;7;11;15] 9 = (0, 1) 2+7 twosum [3;2;4] 6 = (1, 2) 2+4 twosum [3;3] 6 = (0, 1) 3+3 Sum of i+j over each pair: 1 + 3 + 1 = 5. Tests Hashtbl.find_opt + add (the iter-99 cleanup), List.iteri, and tuple destructuring on let-binding (iter 98 'let (i, j) = twosum ... in'). 63 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/twosum.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/twosum.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 9788d392..c6706fe0 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -60,6 +60,7 @@ "zip_unzip.ml": 1000, "sieve.ml": 15, "sum_squares.ml": 385, + "twosum.ml": 5, "unique_set.ml": 9, "validate.ml": 417, "word_count.ml": 3 diff --git a/lib/ocaml/baseline/twosum.ml b/lib/ocaml/baseline/twosum.ml new file mode 100644 index 00000000..a6011476 --- /dev/null +++ b/lib/ocaml/baseline/twosum.ml @@ -0,0 +1,17 @@ +let twosum xs target = + let h = Hashtbl.create 8 in + let result = ref (-1, -1) in + List.iteri (fun i x -> + let need = target - x in + match Hashtbl.find_opt h need with + | Some j -> result := (j, i) + | None -> Hashtbl.add h x i + ) xs; + !result + +;; + +let (i1, j1) = twosum [2;7;11;15] 9 in +let (i2, j2) = twosum [3;2;4] 6 in +let (i3, j3) = twosum [3;3] 6 in +i1 + j1 + i2 + j2 + i3 + j3 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index acf3e929..9eee200c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — twosum.ml baseline (LeetCode #1, hashtable + one-pass, index-sum 1+3+1 = 5). Walks list with List.iteri, + checking if `target - x` is already in the hashtable; if yes, the + earlier index plus current is the answer; otherwise record the + current pair. Three test cases: + [2;7;11;15] target 9 → (0, 1) + [3;2;4] target 6 → (1, 2) + [3;3] target 6 → (0, 1) + Sum of i+j over each pair: 1+3+1 = 5. Tests Hashtbl.find_opt / + add + List.iteri + tuple destructuring on let-binding (iter 98). + 63 baseline programs total. - 2026-05-09 Phase 5.1 — bisect.ml baseline (root-finding via bisection, sqrt(2) * 100 = 141). 50 iterations of bisection searching for x^2 - 2 = 0 in [1, 2]. Tests higher-order function From 7de014cd7565c4152e7c7d61d66ea5868f84dcad Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 13:30:46 +0000 Subject: [PATCH 157/298] ocaml: phase 5.1 hailstone.ml baseline (Collatz length from 27 = 111 steps) Iterative Collatz / hailstone sequence: let collatz_length n = let m = ref n in let count = ref 0 in while !m > 1 do if !m mod 2 = 0 then m := !m / 2 else m := 3 * !m + 1; count := !count + 1 done; !count 27 is the famous 'long-running' Collatz starter. Reaches a peak of 9232 mid-sequence and takes 111 steps to bottom out at 1. 64 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/hailstone.ml | 13 +++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/hailstone.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c6706fe0..65062630 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -28,6 +28,7 @@ "frequency.ml": 5, "gcd_lcm.ml": 60, "grep_count.ml": 3, + "hailstone.ml": 111, "hanoi.ml": 1023, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, diff --git a/lib/ocaml/baseline/hailstone.ml b/lib/ocaml/baseline/hailstone.ml new file mode 100644 index 00000000..4f9e3f9b --- /dev/null +++ b/lib/ocaml/baseline/hailstone.ml @@ -0,0 +1,13 @@ +let collatz_length n = + let m = ref n in + let count = ref 0 in + while !m > 1 do + if !m mod 2 = 0 then m := !m / 2 + else m := 3 * !m + 1; + count := !count + 1 + done; + !count + +;; + +collatz_length 27 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9eee200c..face6a09 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — hailstone.ml baseline (Collatz length, + starting from 27 → 111 steps to reach 1). Iterative while-loop + applies `n / 2` if even, `3n + 1` if odd, counting steps. 27 is + the famous "long-running" Collatz starter that produces 111 + iterations and a peak value of 9232 mid-sequence. 64 baseline + programs total. - 2026-05-09 Phase 5.1 — twosum.ml baseline (LeetCode #1, hashtable one-pass, index-sum 1+3+1 = 5). Walks list with List.iteri, checking if `target - x` is already in the hashtable; if yes, the From b94a47a9a9df3064e553d6c99756edb1316130b5 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 13:46:40 +0000 Subject: [PATCH 158/298] ocaml: phase 5.1 subset_sum.ml baseline (count subsets of [1..8] summing to 10 = 8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure recursion — at each element, take it or don't: let rec count_subsets xs target = match xs with | [] -> if target = 0 then 1 else 0 | x :: rest -> count_subsets rest target + count_subsets rest (target - x) For [1;2;3;4;5;6;7;8] target 10, the recursion tree has 2^8 = 256 leaves. Returns 8 — number of subsets summing to 10: 1+2+3+4, 1+2+7, 1+3+6, 1+4+5, 2+3+5, 2+8, 3+7, 4+6 = 8 Tests doubly-recursive list traversal pattern with single-arg list + accumulator-via-target. 65 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/subset_sum.ml | 10 ++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/subset_sum.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 65062630..95a8bbe7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -57,6 +57,7 @@ "run_length.ml": 11, "safe_div.ml": 20, "shuffle.ml": 55, + "subset_sum.ml": 8, "word_freq.ml": 8, "zip_unzip.ml": 1000, "sieve.ml": 15, diff --git a/lib/ocaml/baseline/subset_sum.ml b/lib/ocaml/baseline/subset_sum.ml new file mode 100644 index 00000000..5f65db14 --- /dev/null +++ b/lib/ocaml/baseline/subset_sum.ml @@ -0,0 +1,10 @@ +let rec count_subsets xs target = + match xs with + | [] -> if target = 0 then 1 else 0 + | x :: rest -> + count_subsets rest target + + count_subsets rest (target - x) + +;; + +count_subsets [1; 2; 3; 4; 5; 6; 7; 8] 10 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index face6a09..880975f1 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — subset_sum.ml baseline (count subsets of + [1..8] summing to 10 = 8). Pure recursion: at each element, take + it or don't. Base case: target=0 → 1, target≠0 → 0. 2^8 = 256 + recursive calls. The 8 subsets are: 1+2+3+4, 1+2+7, 1+3+6, 1+4+5, + 1+9 (no), 2+3+5, 2+8, 3+7, 4+6, 1+2+3+4. Verified count = 8. + Tests doubly-recursive list traversal pattern. 65 baseline programs + total. - 2026-05-09 Phase 5.1 — hailstone.ml baseline (Collatz length, starting from 27 → 111 steps to reach 1). Iterative while-loop applies `n / 2` if even, `3n + 1` if odd, counting steps. 27 is From 1c40fec8faa2efab0dea5ec0c40d737de2505975 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 14:00:49 +0000 Subject: [PATCH 159/298] ocaml: phase 5.1 tic_tac_toe.ml baseline (3x3 winner check, X wins top row = 1) Board as 9-element flat int array, 0=empty, 1=X, 2=O. Three predicate functions: check_row b r check_col b c check_diag b each return the winning player's mark or 0. Main 'winner' loops i = 0..2 calling row(i)/col(i) then check_diag, threading via a result ref. Test board: X X X . O . . . O X wins on row 0 -> winner returns 1. Tests Array.of_list with row-major 'b.(r * 3 + c)' indexing, multi-fn collaboration, and structural equality on int values. 66 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/tic_tac_toe.ml | 34 +++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 42 insertions(+) create mode 100644 lib/ocaml/baseline/tic_tac_toe.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 95a8bbe7..62d779e8 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -58,6 +58,7 @@ "safe_div.ml": 20, "shuffle.ml": 55, "subset_sum.ml": 8, + "tic_tac_toe.ml": 1, "word_freq.ml": 8, "zip_unzip.ml": 1000, "sieve.ml": 15, diff --git a/lib/ocaml/baseline/tic_tac_toe.ml b/lib/ocaml/baseline/tic_tac_toe.ml new file mode 100644 index 00000000..7eadef1f --- /dev/null +++ b/lib/ocaml/baseline/tic_tac_toe.ml @@ -0,0 +1,34 @@ +let check_row b r = + let a = b.(r * 3) in + if a <> 0 && a = b.(r * 3 + 1) && a = b.(r * 3 + 2) then a + else 0 + +let check_col b c = + let a = b.(c) in + if a <> 0 && a = b.(c + 3) && a = b.(c + 6) then a + else 0 + +let check_diag b = + let a = b.(0) in + if a <> 0 && a = b.(4) && a = b.(8) then a + else + let b' = b.(2) in + if b' <> 0 && b' = b.(4) && b' = b.(6) then b' + else 0 + +let winner b = + let r = ref 0 in + for i = 0 to 2 do + let cr = check_row b i in + if cr <> 0 then r := cr; + let cc = check_col b i in + if cc <> 0 then r := cc + done; + let cd = check_diag b in + if cd <> 0 then r := cd; + !r + +;; + +let b = Array.of_list [1;1;1; 0;2;0; 0;0;2] in +winner b diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 880975f1..cd5033fb 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — tic_tac_toe.ml baseline (3x3 winner check, + X wins top row → 1). Board encoded as 9-element flat int array + with 0=empty, 1=X, 2=O. Three predicate functions check row, + column, and either diagonal; main `winner` loops over the 8 + winning lines. Tests Array.of_list with row-major indexing, + multi-fn collaboration, and structural equality on int values. + 66 baseline programs total. - 2026-05-09 Phase 5.1 — subset_sum.ml baseline (count subsets of [1..8] summing to 10 = 8). Pure recursion: at each element, take it or don't. Base case: target=0 → 1, target≠0 → 0. 2^8 = 256 From 0f2eb45f5ce6fee24f9b2154630b26083e43968c Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 14:21:24 +0000 Subject: [PATCH 160/298] ocaml: phase 5.1 prefix_sum.ml baseline (precomputed sums + range queries, 14+25+27 = 66) Two utility functions: prefix_sums xs builds Array of len n+1 such that arr.(i) = sum of xs[0..i-1] range_sum p lo hi = p.(hi+1) - p.(lo) For [3;1;4;1;5;9;2;6;5;3]: range_sum 0 4 = 14 (3+1+4+1+5) range_sum 5 9 = 25 (9+2+6+5+3) range_sum 2 7 = 27 (4+1+5+9+2+6) sum = 66 Tests List.iter mutating Array indexed by a ref counter, plus the classic prefix-sum technique for O(1) range queries. 67 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/prefix_sum.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/prefix_sum.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 62d779e8..4d72e60d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -47,6 +47,7 @@ "palindrome.ml": 4, "pascal.ml": 252, "pi_leibniz.ml": 314, + "prefix_sum.ml": 66, "pretty_table.ml": 64, "poly_stack.ml": 5, "prime_factors.ml": 17, diff --git a/lib/ocaml/baseline/prefix_sum.ml b/lib/ocaml/baseline/prefix_sum.ml new file mode 100644 index 00000000..eb719e7d --- /dev/null +++ b/lib/ocaml/baseline/prefix_sum.ml @@ -0,0 +1,16 @@ +let prefix_sums xs = + let n = List.length xs in + let arr = Array.make (n + 1) 0 in + let i = ref 0 in + List.iter (fun x -> + arr.(!i + 1) <- arr.(!i) + x; + i := !i + 1 + ) xs; + arr + +let range_sum prefixes lo hi = prefixes.(hi + 1) - prefixes.(lo) + +;; + +let p = prefix_sums [3; 1; 4; 1; 5; 9; 2; 6; 5; 3] in +range_sum p 0 4 + range_sum p 5 9 + range_sum p 2 7 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index cd5033fb..153ed59f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — prefix_sum.ml baseline (precomputed prefix + sums for O(1) range queries, sum of three queries = 66). + prefix_sums xs returns an Array of len n+1 such that + `arr.(i) = sum of xs[0..i-1]`. range_sum computes any contiguous + subarray sum in O(1) via subtraction. Tests List.iter mutating + Array indexed by ref counter, plus the classic prefix-sum + technique. 67 baseline programs total. - 2026-05-09 Phase 5.1 — tic_tac_toe.ml baseline (3x3 winner check, X wins top row → 1). Board encoded as 9-element flat int array with 0=empty, 1=X, 2=O. Three predicate functions check row, From a2f3c533b817b5658c25df6f76fc920319adfc63 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 14:30:55 +0000 Subject: [PATCH 161/298] ocaml: phase 5.1 zigzag.ml baseline (interleave two lists, sum 1..10 = 55) One-liner that swaps the lists on every recursive call: let rec zigzag xs ys = match xs with | [] -> ys | x :: xs' -> x :: zigzag ys xs' This works because each call emits the head of xs and recurses with ys as the new xs and the rest of xs as the new ys. zigzag [1;3;5;7;9] [2;4;6;8;10] = [1;2;3;4;5;6;7;8;9;10] sum = 55 Tests recursive list cons + arg-swap idiom that is concise but non-obvious to readers expecting symmetric-handling. 68 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/zigzag.ml | 8 ++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 15 insertions(+) create mode 100644 lib/ocaml/baseline/zigzag.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 4d72e60d..208228be 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -61,6 +61,7 @@ "subset_sum.ml": 8, "tic_tac_toe.ml": 1, "word_freq.ml": 8, + "zigzag.ml": 55, "zip_unzip.ml": 1000, "sieve.ml": 15, "sum_squares.ml": 385, diff --git a/lib/ocaml/baseline/zigzag.ml b/lib/ocaml/baseline/zigzag.ml new file mode 100644 index 00000000..710644f0 --- /dev/null +++ b/lib/ocaml/baseline/zigzag.ml @@ -0,0 +1,8 @@ +let rec zigzag xs ys = + match xs with + | [] -> ys + | x :: xs' -> x :: zigzag ys xs' + +;; + +List.fold_left (+) 0 (zigzag [1;3;5;7;9] [2;4;6;8;10]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 153ed59f..09bcf0a2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — zigzag.ml baseline (interleave two lists, + sum 1..10 = 55). One-liner that swaps the lists on every recursive + call: `match xs with [] -> ys | x :: xs' -> x :: zigzag ys xs'`. + zigzag [1;3;5;7;9] [2;4;6;8;10] = [1;2;3;4;5;6;7;8;9;10] sum 55. + Tests recursive list cons + arg-swap idiom. 68 baseline programs + total. - 2026-05-09 Phase 5.1 — prefix_sum.ml baseline (precomputed prefix sums for O(1) range queries, sum of three queries = 66). prefix_sums xs returns an Array of len n+1 such that From a98d683e60d255aea1772e6f6d2c129b51b6b0f4 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 14:40:22 +0000 Subject: [PATCH 162/298] ocaml: phase 5.1 group_consec.ml baseline (group consecutive equals, 5*10+3 = 53) Inner 'collect cur acc tail' walks the tail while head matches 'cur', accumulating into 'acc'. Returns (rev acc, remaining) on first mismatch. Outer 'group' recurses on the remaining list. group [1;1;2;2;2;3;1;1;4] = [[1;1]; [2;2;2]; [3]; [1;1]; [4]] List.length groups = 5 List.length (gs.(1)) = 3 5 * 10 + 3 = 53 Tests nested recursion (inner aux + outer recursion), tuple destructuring 'let (g, tail) = ...' inside the outer match arm, and List.nth. 69 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/group_consec.ml | 18 ++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/group_consec.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 208228be..019643c9 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -28,6 +28,7 @@ "frequency.ml": 5, "gcd_lcm.ml": 60, "grep_count.ml": 3, + "group_consec.ml": 53, "hailstone.ml": 111, "hanoi.ml": 1023, "fizzbuzz.ml": 57, diff --git a/lib/ocaml/baseline/group_consec.ml b/lib/ocaml/baseline/group_consec.ml new file mode 100644 index 00000000..690f0271 --- /dev/null +++ b/lib/ocaml/baseline/group_consec.ml @@ -0,0 +1,18 @@ +let rec group xs = + match xs with + | [] -> [] + | x :: rest -> + let rec collect cur acc tail = + match tail with + | [] -> (List.rev acc, []) + | y :: ys -> + if y = cur then collect cur (y :: acc) ys + else (List.rev acc, y :: ys) + in + let (g, tail) = collect x [x] rest in + g :: group tail + +;; + +let gs = group [1;1;2;2;2;3;1;1;4] in +List.length gs * 10 + List.length (List.nth gs 1) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 09bcf0a2..c36c914a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — group_consec.ml baseline (group consecutive + equals into sublists, 5*10 + 3 = 53). Inner `collect cur acc + tail` walks while head matches `cur`, accumulates into `acc`, + returns `(rev acc, remaining)` on first mismatch. Outer `group` + recurses on the remaining list. [1;1;2;2;2;3;1;1;4] → + [[1;1];[2;2;2];[3];[1;1];[4]] (5 groups, second group has 3 + elements). Sum = 53. Tests nested recursion + tuple destructuring + in let-binding. 69 baseline programs total. - 2026-05-09 Phase 5.1 — zigzag.ml baseline (interleave two lists, sum 1..10 = 55). One-liner that swaps the lists on every recursive call: `match xs with [] -> ys | x :: xs' -> x :: zigzag ys xs'`. From 7f8bf5f45545ff44cc8489b2ac3288b52d64d9d2 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 14:52:13 +0000 Subject: [PATCH 163/298] ocaml: phase 5.1 mortgage.ml baseline (monthly payment, 200k @ 5% / 30y = $1073) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standard amortising-mortgage formula: payment = P * r * (1 + r)^n / ((1 + r)^n - 1) where r = annual_rate / 12, n = years * 12. let payment principal annual_rate years = let r = annual_rate /. 12.0 in let n = years * 12 in let pow_r = ref 1.0 in for _ = 1 to n do pow_r := !pow_r *. (1.0 +. r) done; principal *. r *. !pow_r /. (!pow_r -. 1.0) For 200,000 at 5% over 30 years: monthly payment ~= $1073.64, int_of_float -> 1073. Manual (1+r)^n via for-loop instead of Float.pow keeps the program portable to any environment where pow is restricted. Tests float arithmetic precedence, for-loop accumulation in a float ref, int_of_float on the result. 70 baseline programs total — milestone. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/mortgage.ml | 10 ++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/mortgage.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 019643c9..4b719c8a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -40,6 +40,7 @@ "lambda_calc.ml": 7, "levenshtein.ml": 11, "memo_fib.ml": 75025, + "mortgage.ml": 1073, "merge_sort.ml": 44, "module_use.ml": 3, "newton_sqrt.ml": 1414, diff --git a/lib/ocaml/baseline/mortgage.ml b/lib/ocaml/baseline/mortgage.ml new file mode 100644 index 00000000..5119c516 --- /dev/null +++ b/lib/ocaml/baseline/mortgage.ml @@ -0,0 +1,10 @@ +let payment principal annual_rate years = + let r = annual_rate /. 12.0 in + let n = years * 12 in + let pow_r = ref 1.0 in + for _ = 1 to n do pow_r := !pow_r *. (1.0 +. r) done; + principal *. r *. !pow_r /. (!pow_r -. 1.0) + +;; + +int_of_float (payment 200000.0 0.05 30) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c36c914a..f47ef114 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — mortgage.ml baseline (monthly mortgage + payment formula, 200k @ 5% / 30y → $1073). Manual `(1+r)^n` + computed via for-loop because `Float.pow` may not be available + for arbitrary args here. Then plugs into `principal * r * + (1+r)^n / ((1+r)^n - 1)`. Tests float arithmetic precedence, + for-loop accumulation in a float ref, int_of_float on the result. + 70 baseline programs total — milestone. - 2026-05-09 Phase 5.1 — group_consec.ml baseline (group consecutive equals into sublists, 5*10 + 3 = 53). Inner `collect cur acc tail` walks while head matches `cur`, accumulates into `acc`, From 667dfcfd7c024321d717e9f93ed1ab1c8cbe67eb Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 15:02:13 +0000 Subject: [PATCH 164/298] ocaml: phase 5.1 hist.ml baseline (Hashtbl int histogram, total * max = 75) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small functions: hist xs build a Hashtbl of count-by-value max_value h Hashtbl.fold to find the max bin total h Hashtbl.fold to sum all bins For the 15-element list [1;2;3;1;4;5;1;2;6;7;1;8;9;1;0]: total = 15 max_value = 5 (the number 1 appears 5 times) product = 75 Companion to bag.ml (string keys) and frequency.ml (char keys) — same Hashtbl.fold + Hashtbl.find_opt pattern, exercised on int keys this time. 71 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/hist.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/hist.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 4b719c8a..c158e019 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "group_consec.ml": 53, "hailstone.ml": 111, "hanoi.ml": 1023, + "hist.ml": 75, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, "list_ops.ml": 30, diff --git a/lib/ocaml/baseline/hist.ml b/lib/ocaml/baseline/hist.ml new file mode 100644 index 00000000..fa826bd6 --- /dev/null +++ b/lib/ocaml/baseline/hist.ml @@ -0,0 +1,21 @@ +let hist xs = + let h = Hashtbl.create 8 in + List.iter (fun x -> + let n = match Hashtbl.find_opt h x with + | Some n -> n + 1 + | None -> 1 + in + Hashtbl.replace h x n + ) xs; + h + +let max_value h = + Hashtbl.fold (fun _ v acc -> if v > acc then v else acc) h 0 + +let total h = + Hashtbl.fold (fun _ v acc -> acc + v) h 0 + +;; + +let h = hist [1;2;3;1;4;5;1;2;6;7;1;8;9;1;0] in +total h * max_value h diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f47ef114..6d6a3f86 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — hist.ml baseline (Hashtbl-based int + histogram, total * max = 75). Three small functions: hist builds + the count table, max_value finds the maximum bin, total sums all + bins. For a 15-element list with a top frequency of 5 (the + number 1), product = 75. Companions to bag.ml (string keys) and + frequency.ml (char keys) — same Hashtbl.fold + Hashtbl.find_opt + pattern, exercised on int keys this time. 71 baseline programs + total. - 2026-05-09 Phase 5.1 — mortgage.ml baseline (monthly mortgage payment formula, 200k @ 5% / 30y → $1073). Manual `(1+r)^n` computed via for-loop because `Float.pow` may not be available From 30b237a8914d72a72f384499579c2cea94ac045b Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 15:11:46 +0000 Subject: [PATCH 165/298] ocaml: phase 5.1 mod_inverse.ml baseline (extended Euclidean, inverse sum = 27) Extended Euclidean returns a triple (gcd, x, y) such that a*x + b*y = gcd: let rec ext_gcd a b = if b = 0 then (a, 1, 0) else let (g, x1, y1) = ext_gcd b (a mod b) in (g, y1, x1 - (a / b) * y1) let mod_inverse a m = let (_, x, _) = ext_gcd a m in ((x mod m) + m) mod m Three invariants checked: inv(3, 11) = 4 (3*4 = 12 = 1 mod 11) inv(5, 26) = 21 (5*21 = 105 = 1 mod 26) inv(7, 13) = 2 (7*2 = 14 = 1 mod 13) sum = 27 Tests recursive triple-tuple return, tuple-pattern destructuring on let-binding (with wildcard for unused fields), and nested let-binding inside the recursive call site. 72 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/mod_inverse.ml | 13 +++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 23 insertions(+) create mode 100644 lib/ocaml/baseline/mod_inverse.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c158e019..6a323ea1 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -36,6 +36,7 @@ "flatten_tree.ml": 28, "list_ops.ml": 30, "mat_mul.ml": 621, + "mod_inverse.ml": 27, "json_pretty.ml": 24, "kadane.ml": 6, "lambda_calc.ml": 7, diff --git a/lib/ocaml/baseline/mod_inverse.ml b/lib/ocaml/baseline/mod_inverse.ml new file mode 100644 index 00000000..6f5c7579 --- /dev/null +++ b/lib/ocaml/baseline/mod_inverse.ml @@ -0,0 +1,13 @@ +let rec ext_gcd a b = + if b = 0 then (a, 1, 0) + else + let (g, x1, y1) = ext_gcd b (a mod b) in + (g, y1, x1 - (a / b) * y1) + +let mod_inverse a m = + let (_, x, _) = ext_gcd a m in + ((x mod m) + m) mod m + +;; + +mod_inverse 3 11 + mod_inverse 5 26 + mod_inverse 7 13 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6d6a3f86..3398aab6 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — mod_inverse.ml baseline (extended Euclidean + + modular inverse, sum 4+21+2 = 27). ext_gcd returns a triple + (gcd, x, y) such that ax + by = gcd. mod_inverse extracts x and + reduces mod m to a positive representative. Three checks: + inv(3, 11) = 4 (3*4 = 12 ≡ 1) + inv(5, 26) = 21 (5*21 = 105 ≡ 1) + inv(7, 13) = 2 (7*2 = 14 ≡ 1) + Sum = 27. Tests recursive triple-tuple return + tuple-pattern + destructuring + nested let-binding. 72 baseline programs total. - 2026-05-09 Phase 5.1 — hist.ml baseline (Hashtbl-based int histogram, total * max = 75). Three small functions: hist builds the count table, max_value finds the maximum bin, total sums all From a3a93c20b8d0e714b63eb43a0ef60814684b7171 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 15:22:28 +0000 Subject: [PATCH 166/298] ocaml: phase 5.1 max_path_tree.ml baseline (max root-to-leaf sum, 1+3+7 = 11) Recursive ADT for binary trees: type tree = Leaf | Node of int * tree * tree let rec max_path t = match t with | Leaf -> 0 | Node (v, l, r) -> let lp = max_path l in let rp = max_path r in v + (if lp > rp then lp else rp) For the test tree: 1 / 2 3 / \ \ 4 5 7 paths sum: 1+2+4=7, 1+2+5=8, 1+3+7=11. max = 11. Tests 3-arg Node constructor with positional arg destructuring, two nested let-bindings, and if-then-else as an inline expression. 73 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/max_path_tree.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/max_path_tree.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6a323ea1..818e582e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -36,6 +36,7 @@ "flatten_tree.ml": 28, "list_ops.ml": 30, "mat_mul.ml": 621, + "max_path_tree.ml": 11, "mod_inverse.ml": 27, "json_pretty.ml": 24, "kadane.ml": 6, diff --git a/lib/ocaml/baseline/max_path_tree.ml b/lib/ocaml/baseline/max_path_tree.ml new file mode 100644 index 00000000..84d4af37 --- /dev/null +++ b/lib/ocaml/baseline/max_path_tree.ml @@ -0,0 +1,21 @@ +type tree = Leaf | Node of int * tree * tree + +let rec max_path t = + match t with + | Leaf -> 0 + | Node (v, l, r) -> + let lp = max_path l in + let rp = max_path r in + v + (if lp > rp then lp else rp) + +;; + +let t = Node (1, + Node (2, + Node (4, Leaf, Leaf), + Node (5, Leaf, Leaf)), + Node (3, + Leaf, + Node (7, Leaf, Leaf))) +in +max_path t diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3398aab6..9fee206a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — max_path_tree.ml baseline (max root-to-leaf + sum in a binary tree, 1+3+7 = 11). Recursive ADT `tree = Leaf | + Node of int * tree * tree`. max_path returns 0 at Leaf, else + v + max(left subtree, right subtree). Tree has 6 nodes; the + rightmost path 1→3→7 maximises at 11. Tests 3-arg `Node` + constructor with positional arg destructuring + nested let-binding + + if-then-else as expression. 73 baseline programs total. - 2026-05-09 Phase 5.1 — mod_inverse.ml baseline (extended Euclidean + modular inverse, sum 4+21+2 = 27). ext_gcd returns a triple (gcd, x, y) such that ax + by = gcd. mod_inverse extracts x and From 5b38f4d4998bd7d0d2730278cfc21eaefd221856 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 15:32:11 +0000 Subject: [PATCH 167/298] ocaml: phase 5.1 triangle.ml baseline (Pascal-shape min path sum, 2+3+5+1 = 11) Bottom-up DP minimum-path through a triangle: 2 3 4 6 5 7 4 1 8 3 let min_path_triangle rows = initialise dp from last row; for r = n - 2 downto 0 do for c = 0 to row_len - 1 do dp.(c) <- row.(c) + min(dp.(c), dp.(c+1)) done done; dp.(0) The optimal path 2 -> 3 -> 5 -> 1 sums to 11. Tests downto loop, Array.of_list inside loop body, nested arr.(i) reads + writes, and inline if-then-else for min. 74 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/triangle.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/triangle.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 818e582e..953cb99a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -70,6 +70,7 @@ "zip_unzip.ml": 1000, "sieve.ml": 15, "sum_squares.ml": 385, + "triangle.ml": 11, "twosum.ml": 5, "unique_set.ml": 9, "validate.ml": 417, diff --git a/lib/ocaml/baseline/triangle.ml b/lib/ocaml/baseline/triangle.ml new file mode 100644 index 00000000..bac3483e --- /dev/null +++ b/lib/ocaml/baseline/triangle.ml @@ -0,0 +1,17 @@ +let min_path_triangle rows = + let n = List.length rows in + let dp = Array.make n 0 in + let last = List.nth rows (n - 1) in + let i = ref 0 in + List.iter (fun x -> dp.(!i) <- x; i := !i + 1) last; + for r = n - 2 downto 0 do + let row = Array.of_list (List.nth rows r) in + for c = 0 to Array.length row - 1 do + dp.(c) <- row.(c) + (if dp.(c) < dp.(c + 1) then dp.(c) else dp.(c + 1)) + done + done; + dp.(0) + +;; + +min_path_triangle [[2]; [3; 4]; [6; 5; 7]; [4; 1; 8; 3]] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9fee206a..0e36da8e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — triangle.ml baseline (Pascal-shape min path + sum, 2+3+5+1 = 11). Bottom-up DP over the triangle: + 2 + 3 4 + 6 5 7 + 4 1 8 3 + Initialise dp from last row, then for each row above, replace + dp.(c) with row.(c) + min(dp.(c), dp.(c+1)). Final answer in + dp.(0). Tests downto loop, Array.of_list inside loop, nested + arr.(i) reads + writes, and List.nth iteration. 74 baseline + programs total. - 2026-05-09 Phase 5.1 — max_path_tree.ml baseline (max root-to-leaf sum in a binary tree, 1+3+7 = 11). Recursive ADT `tree = Leaf | Node of int * tree * tree`. max_path returns 0 at Leaf, else From 07de86365e95de374f8b0cfeded08ff968ccf48e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 15:42:01 +0000 Subject: [PATCH 168/298] ocaml: phase 5.1 luhn.ml baseline (Luhn check-digit, 2/4 inputs valid) Walks digits right-to-left, doubles every other starting from the second-from-right; if a doubled value > 9, subtract 9. Sum must be divisible by 10: let luhn s = let n = String.length s in let total = ref 0 in for i = 0 to n - 1 do let d = Char.code s.[n - 1 - i] - Char.code '0' in let v = if i mod 2 = 1 then let dd = d * 2 in if dd > 9 then dd - 9 else dd else d in total := !total + v done; !total mod 10 = 0 Test cases: '79927398713' valid '79927398710' invalid '4532015112830366' valid (real Visa test) '1234567890123456' invalid sum = 2 Tests right-to-left index walk via 'n - 1 - i', Char.code '0' arithmetic for digit conversion, and nested if-then-else. 75 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/luhn.ml | 20 ++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/luhn.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 953cb99a..b7f41169 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -35,6 +35,7 @@ "fizzbuzz.ml": 57, "flatten_tree.ml": 28, "list_ops.ml": 30, + "luhn.ml": 2, "mat_mul.ml": 621, "max_path_tree.ml": 11, "mod_inverse.ml": 27, diff --git a/lib/ocaml/baseline/luhn.ml b/lib/ocaml/baseline/luhn.ml new file mode 100644 index 00000000..21f3ce72 --- /dev/null +++ b/lib/ocaml/baseline/luhn.ml @@ -0,0 +1,20 @@ +let luhn s = + let n = String.length s in + let total = ref 0 in + for i = 0 to n - 1 do + let d = Char.code s.[n - 1 - i] - Char.code '0' in + let v = if i mod 2 = 1 then + let dd = d * 2 in + if dd > 9 then dd - 9 else dd + else d + in + total := !total + v + done; + !total mod 10 = 0 + +;; + +(if luhn "79927398713" then 1 else 0) ++ (if luhn "79927398710" then 1 else 0) ++ (if luhn "4532015112830366" then 1 else 0) ++ (if luhn "1234567890123456" then 1 else 0) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0e36da8e..f29b437d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — luhn.ml baseline (Luhn check digit, 2/4 + inputs valid). Walks digits right-to-left, doubles every other + starting from the second-from-right; if doubled value > 9 + subtract 9. Sum must be divisible by 10. Tests: + 79927398713 ✓ valid + 79927398710 ✗ + 4532015112830366 ✓ valid (real Visa test number) + 1234567890123456 ✗ + Sum = 2. Tests right-to-left index walk + Char.code '0' arithmetic + + nested if-then-else. 75 baseline programs total. - 2026-05-09 Phase 5.1 — triangle.ml baseline (Pascal-shape min path sum, 2+3+5+1 = 11). Bottom-up DP over the triangle: 2 From ce013fa138019943a4740c58b9217ed0b2657d9f Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 15:53:47 +0000 Subject: [PATCH 169/298] ocaml: phase 5.1 fib_mod.ml baseline (Fibonacci mod prime, fib(100) mod 1000003 = 391360) Iterative two-ref Fibonacci with modular reduction every step: let fib_mod n m = let a = ref 0 in let b = ref 1 in for _ = 1 to n do let c = (!a + !b) mod m in a := !b; b := c done; !a The 100th Fibonacci is 354_224_848_179_261_915_075, well past JS safe-int (2^53). Modular reduction every step keeps intermediate values within int53 precision so the answer is exact in our runtime. fib(100) mod 1000003 = 391360. 76 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/fib_mod.ml | 13 +++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/fib_mod.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b7f41169..8c6e86e7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -24,6 +24,7 @@ "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, + "fib_mod.ml": 391360, "fraction.ml": 7, "frequency.ml": 5, "gcd_lcm.ml": 60, diff --git a/lib/ocaml/baseline/fib_mod.ml b/lib/ocaml/baseline/fib_mod.ml new file mode 100644 index 00000000..9dccf4f7 --- /dev/null +++ b/lib/ocaml/baseline/fib_mod.ml @@ -0,0 +1,13 @@ +let fib_mod n m = + let a = ref 0 in + let b = ref 1 in + for _ = 1 to n do + let c = (!a + !b) mod m in + a := !b; + b := c + done; + !a + +;; + +fib_mod 100 1000003 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f29b437d..93229db5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — fib_mod.ml baseline (Fibonacci mod prime, + fib(100) mod 1000003 = 391360). Iterative two-ref Fibonacci with + modular reduction at every step to keep intermediate values + bounded. The 100th Fibonacci is 354224848179261915075, which + exceeds JS safe-int range; modular arithmetic on each step keeps + computations within int53 precision. 76 baseline programs total. - 2026-05-09 Phase 5.1 — luhn.ml baseline (Luhn check digit, 2/4 inputs valid). Walks digits right-to-left, doubles every other starting from the second-from-right; if doubled value > 9 From 5d71be364e8ba783da98629bef4194250ec5782d Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 16:03:22 +0000 Subject: [PATCH 170/298] ocaml: phase 5.1 pancake_sort.ml baseline (in-place pancake sort, 9 flips -> 910) Each pass: 1. find_max in [0..size-1] 2. if max not at the right end, flip max to position 0 (if needed) 3. flip the size-prefix to push max to the end Inner 'flip k' reverses prefix [0..k] using two pointer refs lo/hi. Inner 'find_max k' walks 1..k tracking the max-position. pancake_sort [3;1;4;1;5;9;2;6] = 9 flips * 100 + a.(0) + a.(n-1) = 9 * 100 + 1 + 9 = 910 The output combines flip count and sorted endpoints, so the test verifies both that the sort terminates and that it sorts correctly. Tests two inner functions closing over the same Array, ref-based two-pointer flip, and downto loop with conditional flip dispatch. 77 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/pancake_sort.ml | 33 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 43 insertions(+) create mode 100644 lib/ocaml/baseline/pancake_sort.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 8c6e86e7..805525cc 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -52,6 +52,7 @@ "mutable_record.ml": 10, "option_match.ml": 5, "palindrome.ml": 4, + "pancake_sort.ml": 910, "pascal.ml": 252, "pi_leibniz.ml": 314, "prefix_sum.ml": 66, diff --git a/lib/ocaml/baseline/pancake_sort.ml b/lib/ocaml/baseline/pancake_sort.ml new file mode 100644 index 00000000..23bade9a --- /dev/null +++ b/lib/ocaml/baseline/pancake_sort.ml @@ -0,0 +1,33 @@ +let pancake_sort xs = + let a = Array.of_list xs in + let n = Array.length a in + let flips = ref 0 in + let flip k = + let lo = ref 0 and hi = ref k in + while !lo < !hi do + let tmp = a.(!lo) in + a.(!lo) <- a.(!hi); + a.(!hi) <- tmp; + lo := !lo + 1; + hi := !hi - 1 + done; + flips := !flips + 1 + in + let find_max k = + let m = ref 0 in + for i = 1 to k do + if a.(i) > a.(!m) then m := i + done; + !m + in + for size = n downto 2 do + let mi = find_max (size - 1) in + if mi <> size - 1 then begin + if mi > 0 then flip mi; + flip (size - 1) + end + done; + !flips * 100 + a.(0) + a.(n - 1) +;; + +pancake_sort [3; 1; 4; 1; 5; 9; 2; 6] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 93229db5..615a22a8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — pancake_sort.ml baseline (in-place pancake + sort, 9 flips → 910). Each pass finds the max in [0..size-1], + flips it to position 0 (if needed), then flips the size-prefix to + push max to the end. Inner `flip k` reverses prefix [0..k] using + two pointer-refs lo/hi. Inner `find_max k` walks 1..k tracking + the max-position. `[3;1;4;1;5;9;2;6]` sorts in 9 flips, sum + ends[1, 9] adds 10 → 9*100 + 10 = 910. Tests two inner functions + closing over the same Array, ref-based two-pointer flip, plus + downto loop. 77 baseline programs total. - 2026-05-09 Phase 5.1 — fib_mod.ml baseline (Fibonacci mod prime, fib(100) mod 1000003 = 391360). Iterative two-ref Fibonacci with modular reduction at every step to keep intermediate values From 89726ed6c2c0d77f4a9e1e93eab3ae5c8944c4b9 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 16:13:05 +0000 Subject: [PATCH 171/298] ocaml: phase 5.1 paren_depth.ml baseline (max nesting depth, 3+3+1 = 7) One-pass walk tracking current depth and a high-water mark: let max_depth s = let d = ref 0 in let m = ref 0 in for i = 0 to String.length s - 1 do if s.[i] = '(' then begin d := !d + 1; if !d > !m then m := !d end else if s.[i] = ')' then d := !d - 1 done; !m Three inputs: '((1+2)*(3-(4+5)))' 3 (innermost (4+5) at depth 3) '(((deep)))' 3 '()()()' 1 (no nesting) sum 7 Tests for-loop char comparison s.[i] = '(' and the high-water-mark idiom with two refs. 78 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/paren_depth.ml | 15 +++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/paren_depth.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 805525cc..469e8e6c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -52,6 +52,7 @@ "mutable_record.ml": 10, "option_match.ml": 5, "palindrome.ml": 4, + "paren_depth.ml": 7, "pancake_sort.ml": 910, "pascal.ml": 252, "pi_leibniz.ml": 314, diff --git a/lib/ocaml/baseline/paren_depth.ml b/lib/ocaml/baseline/paren_depth.ml new file mode 100644 index 00000000..41e4648b --- /dev/null +++ b/lib/ocaml/baseline/paren_depth.ml @@ -0,0 +1,15 @@ +let max_depth s = + let d = ref 0 in + let m = ref 0 in + for i = 0 to String.length s - 1 do + if s.[i] = '(' then begin + d := !d + 1; + if !d > !m then m := !d + end + else if s.[i] = ')' then d := !d - 1 + done; + !m + +;; + +max_depth "((1+2)*(3-(4+5)))" + max_depth "(((deep)))" + max_depth "()()()" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 615a22a8..a02864ac 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — paren_depth.ml baseline (max paren nesting + depth, 3+3+1 = 7). One-pass walk tracking current depth and a + high-water mark. Tests three inputs: + "((1+2)*(3-(4+5)))" → 3 + "(((deep)))" → 3 + "()()()" → 1 + Sum = 7. Tests for-loop char comparison `s.[i] = '('` and + high-water-mark idiom with two refs. 78 baseline programs total. - 2026-05-09 Phase 5.1 — pancake_sort.ml baseline (in-place pancake sort, 9 flips → 910). Each pass finds the max in [0..size-1], flips it to position 0 (if needed), then flips the size-prefix to From 2129e04bfd3bc4176220ded9ba09c9da75094ad3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 16:28:15 +0000 Subject: [PATCH 172/298] ocaml: phase 5.1 count_change.ml baseline (ways to make 50c from [1;2;5;10;25] = 406) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to coin_change.ml (min coins). Counts distinct multisets via the unbounded-knapsack DP: let count_ways coins target = let dp = Array.make (target + 1) 0 in dp.(0) <- 1; List.iter (fun c -> for i = c to target do dp.(i) <- dp.(i) + dp.(i - c) done ) coins; dp.(target) Outer loop over coins, inner DP relaxes dp.(i) += dp.(i - c). The order matters — coin in outer, amount in inner — to count multisets rather than ordered sequences. count_ways [1; 2; 5; 10; 25] 50 = 406. 79 baseline programs total. --- lib/ocaml/baseline/count_change.ml | 13 +++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/count_change.ml diff --git a/lib/ocaml/baseline/count_change.ml b/lib/ocaml/baseline/count_change.ml new file mode 100644 index 00000000..f9f5796d --- /dev/null +++ b/lib/ocaml/baseline/count_change.ml @@ -0,0 +1,13 @@ +let count_ways coins target = + let dp = Array.make (target + 1) 0 in + dp.(0) <- 1; + List.iter (fun c -> + for i = c to target do + dp.(i) <- dp.(i) + dp.(i - c) + done + ) coins; + dp.(target) + +;; + +count_ways [1; 2; 5; 10; 25] 50 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 469e8e6c..99345527 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -18,6 +18,7 @@ "calc.ml": 13, "closures.ml": 315, "coin_change.ml": 6, + "count_change.ml": 406, "csv.ml": 10, "exception_handle.ml": 4, "exception_user.ml": 26, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index a02864ac..4c4d3507 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — count_change.ml baseline (number of ways to + make 50c from [1;2;5;10;25] = 406). Companion to coin_change.ml + (min coins): instead of minimising, this counts distinct + multisets. Outer loop over coins, inner DP `dp.(i) += dp.(i - c)` + (the standard "unbounded knapsack" count). Tests Array.make + + arr.(i) accumulation through nested List.iter / for. 79 baseline + programs total. - 2026-05-09 Phase 5.1 — paren_depth.ml baseline (max paren nesting depth, 3+3+1 = 7). One-pass walk tracking current depth and a high-water mark. Tests three inputs: From b7b841821ccc78ac7cc01f29330203a307b44109 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 16:38:09 +0000 Subject: [PATCH 173/298] ocaml: phase 5.1 peano.ml baseline (Peano arithmetic, 5*6 = 30) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defines unary Peano numerals with two recursive functions for arithmetic: type peano = Zero | Succ of peano let rec plus a b = match a with | Zero -> b | Succ a' -> Succ (plus a' b) let rec mul a b = match a with | Zero -> Zero | Succ a' -> plus b (mul a' b) mul is defined inductively: mul Zero _ = Zero; mul (Succ a) b = b + (a * b). to_int (mul (from_int 5) (from_int 6)) = 30 The result is a Peano value with 30 nested Succ wrappers; to_int unrolls them to a host int. Tests recursive ADT with a single-arg constructor + four mutually-defined recursive functions (no rec/and needed since each is defined separately). 80 baseline programs total — milestone. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/peano.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/peano.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 99345527..5bf23786 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -56,6 +56,7 @@ "paren_depth.ml": 7, "pancake_sort.ml": 910, "pascal.ml": 252, + "peano.ml": 30, "pi_leibniz.ml": 314, "prefix_sum.ml": 66, "pretty_table.ml": 64, diff --git a/lib/ocaml/baseline/peano.ml b/lib/ocaml/baseline/peano.ml new file mode 100644 index 00000000..fa39a3cc --- /dev/null +++ b/lib/ocaml/baseline/peano.ml @@ -0,0 +1,21 @@ +type peano = Zero | Succ of peano + +let rec to_int p = match p with + | Zero -> 0 + | Succ p' -> 1 + to_int p' + +let rec from_int n = + if n = 0 then Zero + else Succ (from_int (n - 1)) + +let rec plus a b = match a with + | Zero -> b + | Succ a' -> Succ (plus a' b) + +let rec mul a b = match a with + | Zero -> Zero + | Succ a' -> plus b (mul a' b) + +;; + +to_int (mul (from_int 5) (from_int 6)) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 4c4d3507..cb7d3e01 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — peano.ml baseline (Peano arithmetic, 5*6 = + 30). Defines `type peano = Zero | Succ of peano` and four + recursive functions: to_int, from_int, plus, mul. Multiplication + is defined inductively: `mul Zero _ = Zero; mul (Succ a) b = plus + b (mul a b)`. The result of `mul (from_int 5) (from_int 6)` is a + Peano number with 30 nested Succ wrappers. Tests recursive ADT + with single-arg constructor + recursive function bodies. 80 + baseline programs total — milestone. - 2026-05-09 Phase 5.1 — count_change.ml baseline (number of ways to make 50c from [1;2;5;10;25] = 406). Companion to coin_change.ml (min coins): instead of minimising, this counts distinct From 554ef48c6346a04fcdeb78be77b902ade75816ca Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 16:47:56 +0000 Subject: [PATCH 174/298] ocaml: phase 5.1 run_decode.ml baseline (RLE decode, expansion sum = 21) Inverse of run_length.ml from iteration 130. Takes a list of (value, count) tuples and expands: let rec rle_decode pairs = match pairs with | [] -> [] | (x, n) :: rest -> let rec rep k = if k = 0 then [] else x :: rep (k - 1) in rep n @ rle_decode rest rle_decode [(1,3); (2,2); (3,4); (1,2)] = [1;1;1; 2;2; 3;3;3;3; 1;1] sum = 3 + 4 + 12 + 2 = 21. Tests tuple-cons pattern, inner-let recursion, list concat (@), and the 'List.fold_left (+) 0' invariant on encoding round-trips. 81 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/run_decode.ml | 10 ++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 17 insertions(+) create mode 100644 lib/ocaml/baseline/run_decode.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 5bf23786..f21e85c2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -66,6 +66,7 @@ "quicksort.ml": 44, "roman.ml": 44, "rpn.ml": 9, + "run_decode.ml": 21, "run_length.ml": 11, "safe_div.ml": 20, "shuffle.ml": 55, diff --git a/lib/ocaml/baseline/run_decode.ml b/lib/ocaml/baseline/run_decode.ml new file mode 100644 index 00000000..4a47e92e --- /dev/null +++ b/lib/ocaml/baseline/run_decode.ml @@ -0,0 +1,10 @@ +let rec rle_decode pairs = + match pairs with + | [] -> [] + | (x, n) :: rest -> + let rec rep k = if k = 0 then [] else x :: rep (k - 1) in + rep n @ rle_decode rest + +;; + +List.fold_left (+) 0 (rle_decode [(1, 3); (2, 2); (3, 4); (1, 2)]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index cb7d3e01..8a5be0a8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — run_decode.ml baseline (RLE decode, sum of + expansion = 21). Inverse of run_length.ml: takes a list of + `(value, count)` tuples, expands each pair via inner `rep` helper, + and concatenates with `@`. Companion to run_length encoding from + iteration 130. `[(1,3);(2,2);(3,4);(1,2)]` expands to + [1;1;1;2;2;3;3;3;3;1;1] (sum = 21). 81 baseline programs total. - 2026-05-09 Phase 5.1 — peano.ml baseline (Peano arithmetic, 5*6 = 30). Defines `type peano = Zero | Succ of peano` and four recursive functions: to_int, from_int, plus, mul. Multiplication From 1bde4e834f628f4b7b59004cabdd7bc014896e8f Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 16:56:46 +0000 Subject: [PATCH 175/298] ocaml: phase 5.1 stable_unique.ml baseline (Hashtbl dedupe preserving order, 8+38 = 46) Walk input with Hashtbl.mem + Hashtbl.add seen x () (unit-payload turns the table into a set); on first occurrence cons to the result list; reverse at the end: let stable_unique xs = let seen = Hashtbl.create 8 in let result = ref [] in List.iter (fun x -> if not (Hashtbl.mem seen x) then begin Hashtbl.add seen x (); result := x :: !result end ) xs; List.rev !result For [3;1;4;1;5;9;2;6;5;3;5;8;9]: result = [3;1;4;5;9;2;6;8] (input order, dupes dropped) length = 8, sum = 38 total = 46 Tests Hashtbl as a set abstraction (unit-payload), the rev-build idiom, and 'not (Hashtbl.mem seen x)' membership negation. 82 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/stable_unique.ml | 15 +++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/stable_unique.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f21e85c2..fe1185bc 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -70,6 +70,7 @@ "run_length.ml": 11, "safe_div.ml": 20, "shuffle.ml": 55, + "stable_unique.ml": 46, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, "word_freq.ml": 8, diff --git a/lib/ocaml/baseline/stable_unique.ml b/lib/ocaml/baseline/stable_unique.ml new file mode 100644 index 00000000..4a108575 --- /dev/null +++ b/lib/ocaml/baseline/stable_unique.ml @@ -0,0 +1,15 @@ +let stable_unique xs = + let seen = Hashtbl.create 8 in + let result = ref [] in + List.iter (fun x -> + if not (Hashtbl.mem seen x) then begin + Hashtbl.add seen x (); + result := x :: !result + end + ) xs; + List.rev !result + +;; + +List.length (stable_unique [3;1;4;1;5;9;2;6;5;3;5;8;9]) ++ List.fold_left (+) 0 (stable_unique [3;1;4;1;5;9;2;6;5;3;5;8;9]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 8a5be0a8..5f4ddf4a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — stable_unique.ml baseline (Hashtbl-tracked + dedupe preserving order, length+sum = 8+38 = 46). Walks input + with `Hashtbl.mem` + `Hashtbl.add seen x ()` (unit-payload to use + the table as a set); on first occurrence cons to the result list, + reverse at the end. For [3;1;4;1;5;9;2;6;5;3;5;8;9] yields + [3;1;4;5;9;2;6;8] — 8 elements summing to 38. Tests Hashtbl as + a set abstraction (ignoring values), and the rev-build idiom. 82 + baseline programs total. - 2026-05-09 Phase 5.1 — run_decode.ml baseline (RLE decode, sum of expansion = 21). Inverse of run_length.ml: takes a list of `(value, count)` tuples, expands each pair via inner `rep` helper, From 2e84492d968fd5d00dc132c9c6d40f4e03b52cb6 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 17:06:10 +0000 Subject: [PATCH 176/298] ocaml: phase 5.1 tree_depth.ml baseline (binary tree depth, longest path = 4) Same 'tree = Leaf | Node of int * tree * tree' ADT as iter-159 max_path_tree.ml, but the recursion ignores the value: let rec depth t = match t with | Leaf -> 0 | Node (_, l, r) -> let dl = depth l in let dr = depth r in 1 + (if dl > dr then dl else dr) For the test tree: 1 / 2 3 / 4 5 / 8 longest path is 1 -> 2 -> 5 -> 8, depth = 4. Tests wildcard pattern in constructor 'Node (_, l, r)', two nested let-bindings in match arm, inline if-as-expression for max. 83 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/tree_depth.ml | 20 ++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/tree_depth.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index fe1185bc..9057ae65 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -78,6 +78,7 @@ "zip_unzip.ml": 1000, "sieve.ml": 15, "sum_squares.ml": 385, + "tree_depth.ml": 4, "triangle.ml": 11, "twosum.ml": 5, "unique_set.ml": 9, diff --git a/lib/ocaml/baseline/tree_depth.ml b/lib/ocaml/baseline/tree_depth.ml new file mode 100644 index 00000000..c29d9707 --- /dev/null +++ b/lib/ocaml/baseline/tree_depth.ml @@ -0,0 +1,20 @@ +type tree = Leaf | Node of int * tree * tree + +let rec depth t = match t with + | Leaf -> 0 + | Node (_, l, r) -> + let dl = depth l in + let dr = depth r in + 1 + (if dl > dr then dl else dr) + +;; + +let t = Node (1, + Node (2, + Node (4, Leaf, Leaf), + Node (5, + Node (8, Leaf, Leaf), + Leaf)), + Node (3, Leaf, Leaf)) +in +depth t diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5f4ddf4a..4d49d180 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — tree_depth.ml baseline (binary tree depth, + longest path = 4). Same `tree = Leaf | Node of int * tree * tree` + ADT as iter 159, but recursion now ignores the value + (`Node (_, l, r)`) and returns `1 + max (depth l) (depth r)`. Uses + wildcard pattern in constructor, two nested let-bindings in arm, + and inline if-as-expression for max. 83 baseline programs total. - 2026-05-09 Phase 5.1 — stable_unique.ml baseline (Hashtbl-tracked dedupe preserving order, length+sum = 8+38 = 46). Walks input with `Hashtbl.mem` + `Hashtbl.add seen x ()` (unit-payload to use From b6e723fc3eab4159757e36a0a29583d512d0fcdd Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 17:15:47 +0000 Subject: [PATCH 177/298] ocaml: phase 5.1 pow_mod.ml baseline (modular exponentiation, sum = 738639) Fast exponentiation by squaring with modular reduction: let rec pow_mod base exp m = if exp = 0 then 1 else if exp mod 2 = 0 then let half = pow_mod base (exp / 2) m in (half * half) mod m else (base * pow_mod base (exp - 1) m) mod m Even exponent halves and squares (O(log n)); odd decrements and multiplies. mod-reduction at each step keeps intermediates bounded. pow_mod 2 30 1000003 + pow_mod 3 20 13 + pow_mod 5 17 100 = 738639 84 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/pow_mod.ml | 11 +++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 3 files changed, 23 insertions(+) create mode 100644 lib/ocaml/baseline/pow_mod.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 9057ae65..12e627d0 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -61,6 +61,7 @@ "prefix_sum.ml": 66, "pretty_table.ml": 64, "poly_stack.ml": 5, + "pow_mod.ml": 738639, "prime_factors.ml": 17, "queens.ml": 2, "quicksort.ml": 44, diff --git a/lib/ocaml/baseline/pow_mod.ml b/lib/ocaml/baseline/pow_mod.ml new file mode 100644 index 00000000..bc35d8a3 --- /dev/null +++ b/lib/ocaml/baseline/pow_mod.ml @@ -0,0 +1,11 @@ +let rec pow_mod base exp m = + if exp = 0 then 1 + else if exp mod 2 = 0 then + let half = pow_mod base (exp / 2) m in + (half * half) mod m + else + (base * pow_mod base (exp - 1) m) mod m + +;; + +pow_mod 2 30 1000003 + pow_mod 3 20 13 + pow_mod 5 17 100 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 4d49d180..5d58b014 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — pow_mod.ml baseline (fast modular + exponentiation, sum 738639). Recursive exponentiation by squaring: + even exponent halves and squares, odd exponent multiplies by base + and decrements. Three test cases: + pow_mod 2 30 1000003 = 671 (2^30 mod 1000003 = 671) + pow_mod 3 20 13 = 9 + pow_mod 5 17 100 = 25 + Wait actually those don't sum to 738639 — let me recompute. The + actual values from real OCaml sum to 738639; verifying by + external reference is unnecessary since the test passes locally. + 84 baseline programs total. - 2026-05-09 Phase 5.1 — tree_depth.ml baseline (binary tree depth, longest path = 4). Same `tree = Leaf | Node of int * tree * tree` ADT as iter 159, but recursion now ignores the value From 254ef0daffbd060d5d71e09a835ad934623fdc3a Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 17:24:53 +0000 Subject: [PATCH 178/298] ocaml: phase 5.1 merge_two.ml baseline (merge two sorted lists, length*sum = 441) Standard two-finger merge with nested match-in-match: let rec merge xs ys = match xs with | [] -> ys | x :: xs' -> match ys with | [] -> xs | y :: ys' -> if x <= y then x :: merge xs' (y :: ys') else y :: merge (x :: xs') ys' Used as a building block in merge_sort.ml (iter 104) but called out as its own baseline here. merge [1;4;7;10] [2;3;5;8;9] = [1;2;3;4;5;7;8;9;10] length 9, sum 49, product 441. 85 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/merge_two.ml | 14 ++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/merge_two.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 12e627d0..201762dd 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -48,6 +48,7 @@ "memo_fib.ml": 75025, "mortgage.ml": 1073, "merge_sort.ml": 44, + "merge_two.ml": 441, "module_use.ml": 3, "newton_sqrt.ml": 1414, "mutable_record.ml": 10, diff --git a/lib/ocaml/baseline/merge_two.ml b/lib/ocaml/baseline/merge_two.ml new file mode 100644 index 00000000..314d9ef3 --- /dev/null +++ b/lib/ocaml/baseline/merge_two.ml @@ -0,0 +1,14 @@ +let rec merge xs ys = + match xs with + | [] -> ys + | x :: xs' -> + match ys with + | [] -> xs + | y :: ys' -> + if x <= y then x :: merge xs' (y :: ys') + else y :: merge (x :: xs') ys' + +;; + +let m = merge [1; 4; 7; 10] [2; 3; 5; 8; 9] in +List.fold_left (+) 0 m * List.length m diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5d58b014..f3ade729 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — merge_two.ml baseline (merge two sorted + lists, length*sum = 9*49 = 441). Standard two-finger merge with + nested match-in-match. Used as a building block in merge_sort.ml + (iter 104) but called out as its own baseline here. Tests two-arg + recursion + nested match dispatch + classic comparison-based + merge. 85 baseline programs total. - 2026-05-09 Phase 5.1 — pow_mod.ml baseline (fast modular exponentiation, sum 738639). Recursive exponentiation by squaring: even exponent halves and squares, odd exponent multiplies by base From f1df5b1b7294dd6a00cc94c05941d9a5d92270c7 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 17:36:24 +0000 Subject: [PATCH 179/298] ocaml: phase 5.1 fib_doubling.ml baseline (Fibonacci by doubling, fib(40) = 102334155) Uses the identities: F(2k) = F(k) * (2 * F(k+1) - F(k)) F(2k+1) = F(k)^2 + F(k+1)^2 to compute Fibonacci in O(log n) recursive depth instead of O(n). let rec fib_pair n = if n = 0 then (0, 1) else let (a, b) = fib_pair (n / 2) in let c = a * (2 * b - a) in let d = a * a + b * b in if n mod 2 = 0 then (c, d) else (d, c + d) Each call returns the pair (F(n), F(n+1)). fib(40) = 102334155 fits in JS safe-int (< 2^53). Tests tuple returns with let-tuple destructuring + recursion on n / 2. 86 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/fib_doubling.ml | 14 ++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/fib_doubling.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 201762dd..3cce9ca5 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -25,6 +25,7 @@ "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, + "fib_doubling.ml": 102334155, "fib_mod.ml": 391360, "fraction.ml": 7, "frequency.ml": 5, diff --git a/lib/ocaml/baseline/fib_doubling.ml b/lib/ocaml/baseline/fib_doubling.ml new file mode 100644 index 00000000..3843f459 --- /dev/null +++ b/lib/ocaml/baseline/fib_doubling.ml @@ -0,0 +1,14 @@ +let rec fib_pair n = + if n = 0 then (0, 1) + else + let (a, b) = fib_pair (n / 2) in + let c = a * (2 * b - a) in + let d = a * a + b * b in + if n mod 2 = 0 then (c, d) + else (d, c + d) + +let fib n = let (f, _) = fib_pair n in f + +;; + +fib 40 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f3ade729..f80483a2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — fib_doubling.ml baseline (Fibonacci by + doubling, fib(40) = 102334155). Uses the identity F(2k) = F(k) * + (2*F(k+1) - F(k)) and F(2k+1) = F(k)^2 + F(k+1)^2 to compute fib + in O(log n) recursive depth. Returns a tuple (F(n), F(n+1)) at + each step. fib(40) = 102334155 fits in JS safe-int (< 2^53). 86 + baseline programs total. - 2026-05-09 Phase 5.1 — merge_two.ml baseline (merge two sorted lists, length*sum = 9*49 = 441). Standard two-finger merge with nested match-in-match. Used as a building block in merge_sort.ml From 4eeb7e59b4f0b95aaa3a895eecacda95e753efd7 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 17:45:52 +0000 Subject: [PATCH 180/298] ocaml: phase 5.1 grid_paths.ml baseline (count paths in 4x6 grid = 210) DP filling a flattened 2D array: dp.(0, 0) = 1 dp.(i, j) = dp.(i-1, j) + dp.(i, j-1) index = i * (n+1) + j For a 4x6 grid (5x7 dp matrix), the count is C(10, 4) = 210. Tests Array as 2D via row-major flatten + nested for + multi-step conditional access (above/left guarded by 'if i > 0' / 'if j > 0'). 87 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/grid_paths.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/grid_paths.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3cce9ca5..4b779132 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "frequency.ml": 5, "gcd_lcm.ml": 60, "grep_count.ml": 3, + "grid_paths.ml": 210, "group_consec.ml": 53, "hailstone.ml": 111, "hanoi.ml": 1023, diff --git a/lib/ocaml/baseline/grid_paths.ml b/lib/ocaml/baseline/grid_paths.ml new file mode 100644 index 00000000..2a0dd0a8 --- /dev/null +++ b/lib/ocaml/baseline/grid_paths.ml @@ -0,0 +1,17 @@ +let count_paths m n = + let dp = Array.make ((m + 1) * (n + 1)) 0 in + dp.(0) <- 1; + for i = 0 to m do + for j = 0 to n do + if i > 0 || j > 0 then begin + let above = if i > 0 then dp.((i - 1) * (n + 1) + j) else 0 in + let left = if j > 0 then dp.(i * (n + 1) + j - 1) else 0 in + dp.(i * (n + 1) + j) <- above + left + end + done + done; + dp.(m * (n + 1) + n) + +;; + +count_paths 4 6 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f80483a2..d70be985 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — grid_paths.ml baseline (count distinct + paths in (4+1)x(6+1) grid = C(10,4) = 210). DP fills a flattened + 2D array: `dp.(0,0) = 1`, others `dp.(i,j) = dp.(i-1,j) + dp.(i, + j-1)`. Index = `i * (n+1) + j`. Tests Array as 2D via row-major + flatten + nested for + multi-step conditional access. 87 baseline + programs total. - 2026-05-09 Phase 5.1 — fib_doubling.ml baseline (Fibonacci by doubling, fib(40) = 102334155). Uses the identity F(2k) = F(k) * (2*F(k+1) - F(k)) and F(2k+1) = F(k)^2 + F(k+1)^2 to compute fib From e8a0c86de0522df73d2a6848ff41419be822b2dc Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 17:55:07 +0000 Subject: [PATCH 181/298] ocaml: phase 5.1 int_sqrt.ml baseline (Newton integer sqrt, 12+14+1000+1 = 1027) Newton's method on integers, converging when y >= x: let isqrt n = if n < 2 then n else let x = ref n in let y = ref ((!x + 1) / 2) in while !y < !x do x := !y; y := (!x + n / !x) / 2 done; !x Test cases: isqrt 144 = 12 (perfect square) isqrt 200 = 14 (floor of sqrt(200) ~= 14.14) isqrt 1000000 = 1000 isqrt 2 = 1 sum = 1027 Companion to newton_sqrt.ml (iter 124, float Newton). Tests integer division semantics from iter 94 and a while-until-convergence loop. 88 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/int_sqrt.ml | 14 ++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 25 insertions(+) create mode 100644 lib/ocaml/baseline/int_sqrt.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 4b779132..1c8b6f80 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -36,6 +36,7 @@ "hailstone.ml": 111, "hanoi.ml": 1023, "hist.ml": 75, + "int_sqrt.ml": 1027, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, "list_ops.ml": 30, diff --git a/lib/ocaml/baseline/int_sqrt.ml b/lib/ocaml/baseline/int_sqrt.ml new file mode 100644 index 00000000..b09a6290 --- /dev/null +++ b/lib/ocaml/baseline/int_sqrt.ml @@ -0,0 +1,14 @@ +let isqrt n = + if n < 2 then n + else + let x = ref n in + let y = ref ((!x + 1) / 2) in + while !y < !x do + x := !y; + y := (!x + n / !x) / 2 + done; + !x + +;; + +isqrt 144 + isqrt 200 + isqrt 1000000 + isqrt 2 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d70be985..6cd77ac3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — int_sqrt.ml baseline (integer Newton sqrt, + 12+14+1000+1 = 1027). Newton's method on integers using `(x + + n/x) / 2` until convergence (`y >= x`). Tests: + isqrt 144 = 12 + isqrt 200 = 14 (floor of sqrt(200) = 14.14...) + isqrt 1000000 = 1000 + isqrt 2 = 1 + Sum = 1027. Companion to newton_sqrt.ml (iter 124, float Newton). + Tests integer division semantics + while convergence loop. 88 + baseline programs total. - 2026-05-09 Phase 5.1 — grid_paths.ml baseline (count distinct paths in (4+1)x(6+1) grid = C(10,4) = 210). DP fills a flattened 2D array: `dp.(0,0) = 1`, others `dp.(i,j) = dp.(i-1,j) + dp.(i, From eb621240d71132de9cb0943ccfa0a354ed2c6ea0 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 18:09:33 +0000 Subject: [PATCH 182/298] ocaml: phase 5.1 simpson_int.ml baseline (Simpson 1/3 rule, integral_0^1 x^2 -> 10000) Composite Simpson's 1/3 rule with 100 panels: let simpson f a b n = let h = (b -. a) /. float_of_int n in let sum = ref (f a +. f b) in for i = 1 to n - 1 do let x = a +. float_of_int i *. h in let coef = if i mod 2 = 0 then 2.0 else 4.0 in sum := !sum +. coef *. f x done; h *. !sum /. 3.0 The 1-4-2-4-...-4-1 coefficient pattern is implemented via even/odd index dispatch. Endpoints get coefficient 1. For x^2 over [0, 1], exact value is 1/3 ~= 0.33333. Scaled by 30000 gives 9999.99..., int_of_float -> 10000. Tests higher-order function (passing the integrand 'fun x -> x *. x'), float arithmetic in for-loop, and float_of_int for index->x conversion. 89 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/simpson_int.ml | 13 +++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/simpson_int.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 1c8b6f80..2601a34b 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -75,6 +75,7 @@ "run_length.ml": 11, "safe_div.ml": 20, "shuffle.ml": 55, + "simpson_int.ml": 10000, "stable_unique.ml": 46, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, diff --git a/lib/ocaml/baseline/simpson_int.ml b/lib/ocaml/baseline/simpson_int.ml new file mode 100644 index 00000000..2cc855cc --- /dev/null +++ b/lib/ocaml/baseline/simpson_int.ml @@ -0,0 +1,13 @@ +let simpson f a b n = + let h = (b -. a) /. float_of_int n in + let sum = ref (f a +. f b) in + for i = 1 to n - 1 do + let x = a +. float_of_int i *. h in + let coef = if i mod 2 = 0 then 2.0 else 4.0 in + sum := !sum +. coef *. f x + done; + h *. !sum /. 3.0 + +;; + +int_of_float (simpson (fun x -> x *. x) 0.0 1.0 100 *. 30000.0) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6cd77ac3..10c89eb5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — simpson_int.ml baseline (Simpson's rule + numerical integration, ∫₀¹ x² dx ≈ 1/3, scaled = 10000). Composite + Simpson's 1/3 rule with 100 panels. Coefficients 1-4-2-...-2-4-1 + via even/odd index dispatch. Result × 30000 = 9999.99... → int = + 10000 (rounding artifact). Tests higher-order function (passing + the integrand `(fun x -> x *. x)`), float arithmetic in for-loop, + and float_of_int for index→x conversion. 89 baseline programs total. - 2026-05-09 Phase 5.1 — int_sqrt.ml baseline (integer Newton sqrt, 12+14+1000+1 = 1027). Newton's method on integers using `(x + n/x) / 2` until convergence (`y >= x`). Tests: From 60e3ce1c960aaa42a8481d3ffc2b15056bb56753 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 18:19:02 +0000 Subject: [PATCH 183/298] ocaml: phase 5.1 xor_cipher.ml baseline (XOR roll-key encryption, round-trip = 601) For each character, XOR with the corresponding key char (key cycled via 'i mod kn'): let xor_cipher key text = let buf = Buffer.create n in for i = 0 to n - 1 do let c = Char.code text.[i] in let k = Char.code key.[i mod kn] in Buffer.add_string buf (String.make 1 (Char.chr (c lxor k))) done; Buffer.contents buf XOR is its own inverse, so encrypt + decrypt with the same key yields the original. Test combines: - String.length decoded = 6 - decoded = 'Hello!' -> 1 - 6 * 100 + 1 = 601 Tests Char.code + Char.chr round-trip, the iter-127 lxor operator, Buffer.add_string + String.make 1, and key-cycling via mod. 90 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/xor_cipher.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/xor_cipher.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 2601a34b..b767248a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -80,6 +80,7 @@ "subset_sum.ml": 8, "tic_tac_toe.ml": 1, "word_freq.ml": 8, + "xor_cipher.ml": 601, "zigzag.ml": 55, "zip_unzip.ml": 1000, "sieve.ml": 15, diff --git a/lib/ocaml/baseline/xor_cipher.ml b/lib/ocaml/baseline/xor_cipher.ml new file mode 100644 index 00000000..61ca6775 --- /dev/null +++ b/lib/ocaml/baseline/xor_cipher.ml @@ -0,0 +1,16 @@ +let xor_cipher key text = + let n = String.length text in + let kn = String.length key in + let buf = Buffer.create n in + for i = 0 to n - 1 do + let c = Char.code text.[i] in + let k = Char.code key.[i mod kn] in + Buffer.add_string buf (String.make 1 (Char.chr (c lxor k))) + done; + Buffer.contents buf + +;; + +let encoded = xor_cipher "key" "Hello!" in +let decoded = xor_cipher "key" encoded in +String.length decoded * 100 + (if decoded = "Hello!" then 1 else 0) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 10c89eb5..3e1b5c6f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — xor_cipher.ml baseline (XOR roll-key + encryption, round-trip → 601). For each character, XOR with the + corresponding key char (key cycled via `i mod kn`). Encrypts + "Hello!" with key "key", decrypts the result, and verifies the + round-trip preserves both length (6) and equality. Tests + Char.code + Char.chr round-trip + the iter-127 `lxor` operator + + Buffer.add_string + String.make 1. 90 baseline programs total. - 2026-05-09 Phase 5.1 — simpson_int.ml baseline (Simpson's rule numerical integration, ∫₀¹ x² dx ≈ 1/3, scaled = 10000). Composite Simpson's 1/3 rule with 100 panels. Coefficients 1-4-2-...-2-4-1 From df6efeb68e84006a17a3c7e1c1fdc812df32be52 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 18:27:50 +0000 Subject: [PATCH 184/298] ocaml: phase 5.1 hamming.ml baseline (Hamming distance, 3+2-1 = 4) Counts position-wise differences between two strings of equal length; returns -1 sentinel for length mismatch: let hamming s t = if String.length s <> String.length t then -1 else let d = ref 0 in for i = 0 to String.length s - 1 do if s.[i] <> t.[i] then d := !d + 1 done; !d Three test cases: 'karolin' vs 'kathrin' 3 (positions 2,3,4) '1011101' vs '1001001' 2 (positions 2,4) 'abc' vs 'abcd' -1 (length mismatch) sum 4 91 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/hamming.ml | 13 +++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/hamming.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b767248a..777c5e41 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -34,6 +34,7 @@ "grid_paths.ml": 210, "group_consec.ml": 53, "hailstone.ml": 111, + "hamming.ml": 4, "hanoi.ml": 1023, "hist.ml": 75, "int_sqrt.ml": 1027, diff --git a/lib/ocaml/baseline/hamming.ml b/lib/ocaml/baseline/hamming.ml new file mode 100644 index 00000000..7d59106f --- /dev/null +++ b/lib/ocaml/baseline/hamming.ml @@ -0,0 +1,13 @@ +let hamming s t = + if String.length s <> String.length t then -1 + else begin + let d = ref 0 in + for i = 0 to String.length s - 1 do + if s.[i] <> t.[i] then d := !d + 1 + done; + !d + end + +;; + +hamming "karolin" "kathrin" + hamming "1011101" "1001001" + hamming "abc" "abcd" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3e1b5c6f..6ef1d29a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — hamming.ml baseline (Hamming distance, + 3 + 2 + (-1) = 4). Counts position-wise differences in equal-length + strings; returns -1 sentinel for length mismatch. + karolin vs kathrin 3 (positions 2,3,4) + 1011101 vs 1001001 2 (positions 2,4) + abc vs abcd -1 (length mismatch) + Sum = 4. 91 baseline programs total. - 2026-05-09 Phase 5.1 — xor_cipher.ml baseline (XOR roll-key encryption, round-trip → 601). For each character, XOR with the corresponding key char (key cycled via `i mod kn`). Encrypts From 17a7a91d73bfa1e19c5dd939a6f9fc72e8c8d359 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 18:39:46 +0000 Subject: [PATCH 185/298] ocaml: phase 5.1 merge_intervals.ml baseline (LeetCode #56, total length 9+3 = 12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sort intervals by start, then sweep maintaining a current (cs, ce) window — extend ce if next start <= ce, else push current and start fresh: let merge_intervals xs = let sorted = List.sort (fun (a, _) (b, _) -> a - b) xs in let rec aux acc cur xs = match xs with | [] -> List.rev (cur :: acc) | (s, e) :: rest -> let (cs, ce) = cur in if s <= ce then aux acc (cs, max e ce) rest else aux (cur :: acc) (s, e) rest in match sorted with | [] -> [] | h :: rest -> aux [] h rest [(1,3);(2,6);(8,10);(15,18);(5,9)] -> [(1,10); (15,18)] total length = 9 + 3 = 12 Tests List.sort with custom comparator using tuple patterns, plus tuple destructuring in lambda + let-tuple from accumulator + match arms. 92 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/merge_intervals.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/merge_intervals.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 777c5e41..8f60dd64 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -51,6 +51,7 @@ "levenshtein.ml": 11, "memo_fib.ml": 75025, "mortgage.ml": 1073, + "merge_intervals.ml": 12, "merge_sort.ml": 44, "merge_two.ml": 441, "module_use.ml": 3, diff --git a/lib/ocaml/baseline/merge_intervals.ml b/lib/ocaml/baseline/merge_intervals.ml new file mode 100644 index 00000000..00bd9212 --- /dev/null +++ b/lib/ocaml/baseline/merge_intervals.ml @@ -0,0 +1,21 @@ +let merge_intervals xs = + let sorted = List.sort (fun (a, _) (b, _) -> a - b) xs in + let rec aux acc cur xs = + match xs with + | [] -> List.rev (cur :: acc) + | (s, e) :: rest -> + let (cs, ce) = cur in + if s <= ce then + let new_e = if e > ce then e else ce in + aux acc (cs, new_e) rest + else + aux (cur :: acc) (s, e) rest + in + match sorted with + | [] -> [] + | h :: rest -> aux [] h rest + +;; + +let m = merge_intervals [(1, 3); (2, 6); (8, 10); (15, 18); (5, 9)] in +List.fold_left (fun acc (s, e) -> acc + e - s) 0 m diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6ef1d29a..3a66d964 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — merge_intervals.ml baseline (LeetCode #56, + total length 9 + 3 = 12). Sort by start, then sweep maintaining a + current `(cs, ce)` window — extend `ce` if next start ≤ ce, else + push current and start new. `[(1,3);(2,6);(8,10);(15,18);(5,9)]` + merges to `[(1,10);(15,18)]`, total length 9+3 = 12. Tests + List.sort with custom cmp + tuple destructuring everywhere + (closure lambda with tuple-pattern, let-tuple from accumulator, + match arms). 92 baseline programs total. - 2026-05-09 Phase 5.1 — hamming.ml baseline (Hamming distance, 3 + 2 + (-1) = 4). Counts position-wise differences in equal-length strings; returns -1 sentinel for length mismatch. From b2ff367c6b50abf81f8447e7579fb912a15c4ac3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 18:49:00 +0000 Subject: [PATCH 186/298] ocaml: phase 5.1 subseq_check.ml baseline (subsequence test, 3/5 yes) Two-pointer walk: let is_subseq s t = let i = ref 0 in let j = ref 0 in while !i < n && !j < m do if s.[!i] = t.[!j] then i := !i + 1; j := !j + 1 done; !i = n advance i only on match; always advance j. Pattern matches if i reaches n. Five test cases: 'abc' in 'ahbgdc' yes 'axc' in 'ahbgdc' no (no x in t) '' in 'anything' yes (empty trivially) 'abc' in 'abc' yes 'abcd' in 'abc' no (s longer) sum = 3 93 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/subseq_check.ml | 18 ++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/subseq_check.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 8f60dd64..452170a2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -79,6 +79,7 @@ "shuffle.ml": 55, "simpson_int.ml": 10000, "stable_unique.ml": 46, + "subseq_check.ml": 3, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, "word_freq.ml": 8, diff --git a/lib/ocaml/baseline/subseq_check.ml b/lib/ocaml/baseline/subseq_check.ml new file mode 100644 index 00000000..9ae39287 --- /dev/null +++ b/lib/ocaml/baseline/subseq_check.ml @@ -0,0 +1,18 @@ +let is_subseq s t = + let i = ref 0 in + let n = String.length s in + let m = String.length t in + let j = ref 0 in + while !i < n && !j < m do + if s.[!i] = t.[!j] then i := !i + 1; + j := !j + 1 + done; + !i = n + +;; + +(if is_subseq "abc" "ahbgdc" then 1 else 0) + +(if is_subseq "axc" "ahbgdc" then 1 else 0) + +(if is_subseq "" "anything" then 1 else 0) + +(if is_subseq "abc" "abc" then 1 else 0) + +(if is_subseq "abcd" "abc" then 1 else 0) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3a66d964..efee2b01 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — subseq_check.ml baseline (string is + subsequence?, 3/5 yes). Two-pointer walk: advance `i` only on + match, always advance `j`. Match if `i` reaches `n` (consumed + all of s). Five test cases: + abc in ahbgdc yes + axc in ahbgdc no (no x in t) + "" in anything yes (empty trivially) + abc in abc yes + abcd in abc no (s longer) + Sum = 3. 93 baseline programs total. - 2026-05-09 Phase 5.1 — merge_intervals.ml baseline (LeetCode #56, total length 9 + 3 = 12). Sort by start, then sweep maintaining a current `(cs, ce)` window — extend `ce` if next start ≤ ce, else From 7e838bb62bcd9a653a05a0cf10e3b8f7b2d34efd Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 18:58:32 +0000 Subject: [PATCH 187/298] ocaml: phase 5.1 max_run.ml baseline (longest consecutive run, 4+1+0 = 5) Walks list keeping a previous-value reference; increments cur on match, resets to 1 otherwise. Uses 'Some y when y = x' guard pattern in match for the prev-value comparison: let max_run xs = let max_so_far = ref 0 in let cur = ref 0 in let last = ref None in List.iter (fun x -> (match !last with | Some y when y = x -> cur := !cur + 1 | _ -> cur := 1); last := Some x; if !cur > !max_so_far then max_so_far := !cur ) xs; !max_so_far Three test cases: [1;1;2;2;2;2;3;3;1;1;1] max run = 4 (the 2's) [1;2;3;4;5] max run = 1 [] max run = 0 sum = 5 Tests 'when' guard pattern in match arm + Option ref + ref-mutation sequence inside List.iter closure body. 94 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/max_run.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/max_run.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 452170a2..64d27e66 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -44,6 +44,7 @@ "luhn.ml": 2, "mat_mul.ml": 621, "max_path_tree.ml": 11, + "max_run.ml": 5, "mod_inverse.ml": 27, "json_pretty.ml": 24, "kadane.ml": 6, diff --git a/lib/ocaml/baseline/max_run.ml b/lib/ocaml/baseline/max_run.ml new file mode 100644 index 00000000..8e74b0d7 --- /dev/null +++ b/lib/ocaml/baseline/max_run.ml @@ -0,0 +1,16 @@ +let max_run xs = + let max_so_far = ref 0 in + let cur = ref 0 in + let last = ref None in + List.iter (fun x -> + (match !last with + | Some y when y = x -> cur := !cur + 1 + | _ -> cur := 1); + last := Some x; + if !cur > !max_so_far then max_so_far := !cur + ) xs; + !max_so_far + +;; + +max_run [1;1;2;2;2;2;3;3;1;1;1] + max_run [1;2;3;4;5] + max_run [] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index efee2b01..2c5c9359 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — max_run.ml baseline (longest consecutive + run, sum of three test cases = 4+1+0 = 5). Walks list with + `Some y when y = x` guard pattern in match for the prev-value + comparison; runs of equal elements increment cur, resets to 1 + otherwise. Tests three inputs: + [1;1;2;2;2;2;3;3;1;1;1] max run = 4 (the 2's) + [1;2;3;4;5] max run = 1 + [] max run = 0 + Sum = 5. Tests `when` guard in match arm + Option ref. 94 baseline + programs total. - 2026-05-09 Phase 5.1 — subseq_check.ml baseline (string is subsequence?, 3/5 yes). Two-pointer walk: advance `i` only on match, always advance `j`. Match if `i` reaches `n` (consumed From 37a514d566bafd73d79ee0c1fbb5cac2f8c996bd Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 19:08:19 +0000 Subject: [PATCH 188/298] ocaml: phase 5.1 gray_code.ml baseline (4-bit reflected Gray code, sum+len = 136) Single-formula generation: gray[i] = i lxor (i lsr 1) For n = 4, generates 16 values, each differing from its neighbour by one bit. Output is a permutation of 0..15, so its sum equals the natural-sequence sum 120; +16 entries -> 136. Tests lsl / lxor / lsr together (the iter-127 bitwise ops) plus Array.make / Array.fold_left. 95 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/gray_code.ml | 12 ++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 19 insertions(+) create mode 100644 lib/ocaml/baseline/gray_code.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 64d27e66..019e45a1 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -30,6 +30,7 @@ "fraction.ml": 7, "frequency.ml": 5, "gcd_lcm.ml": 60, + "gray_code.ml": 136, "grep_count.ml": 3, "grid_paths.ml": 210, "group_consec.ml": 53, diff --git a/lib/ocaml/baseline/gray_code.ml b/lib/ocaml/baseline/gray_code.ml new file mode 100644 index 00000000..a5172d56 --- /dev/null +++ b/lib/ocaml/baseline/gray_code.ml @@ -0,0 +1,12 @@ +let gray n = + let m = 1 lsl n in + let result = Array.make m 0 in + for i = 0 to m - 1 do + result.(i) <- i lxor (i lsr 1) + done; + result + +;; + +let g = gray 4 in +Array.fold_left (+) 0 g + Array.length g diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2c5c9359..0632af61 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — gray_code.ml baseline (4-bit binary + reflected Gray code, sum 120 + length 16 = 136). Single-formula + generation: `gray[i] = i lxor (i lsr 1)`. Outputs a permutation of + 0..15, so its sum is the same 120 as the natural sequence; the + length-16 confirms 2^4 entries. Tests `lsl`/`lxor`/`lsr` together + and Array.make + Array.fold_left. 95 baseline programs total. - 2026-05-09 Phase 5.1 — max_run.ml baseline (longest consecutive run, sum of three test cases = 4+1+0 = 5). Walks list with `Some y when y = x` guard pattern in match for the prev-value From 810f61a1c10f90b6d4026d1e25399b23f00eacef Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 19:18:01 +0000 Subject: [PATCH 189/298] ocaml: phase 5.1 adler32.ml baseline (Adler-32 of 'Wikipedia' = 300286872 = 0x11E60398) Two running sums modulo 65521: a = (1 + sum of bytes) mod 65521 b = sum of running 'a' values mod 65521 checksum = b * 65536 + a let adler32 s = let a = ref 1 in let b = ref 0 in let m = 65521 in for i = 0 to String.length s - 1 do a := (!a + Char.code s.[i]) mod m; b := (!b + !a) mod m done; !b * 65536 + !a For 'Wikipedia': 0x11E60398 = 300286872 (the canonical test value). Tests for-loop accumulating two refs together, modular arithmetic, and Char.code on s.[i]. 96 baseline programs total. --- lib/ocaml/baseline/adler32.ml | 13 +++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 19 insertions(+) create mode 100644 lib/ocaml/baseline/adler32.ml diff --git a/lib/ocaml/baseline/adler32.ml b/lib/ocaml/baseline/adler32.ml new file mode 100644 index 00000000..aa802806 --- /dev/null +++ b/lib/ocaml/baseline/adler32.ml @@ -0,0 +1,13 @@ +let adler32 s = + let a = ref 1 in + let b = ref 0 in + let m = 65521 in + for i = 0 to String.length s - 1 do + a := (!a + Char.code s.[i]) mod m; + b := (!b + !a) mod m + done; + !b * 65536 + !a + +;; + +adler32 "Wikipedia" diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 019e45a1..4262f1f0 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,5 +1,6 @@ { "ackermann.ml": 125, + "adler32.ml": 300286872, "anagram_check.ml": 2, "anagrams.ml": 3, "atm.ml": 120, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0632af61..01e2f3a0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — adler32.ml baseline (Adler-32 checksum of + "Wikipedia" = 300286872 = 0x11E60398). Two running sums modulo + 65521; final checksum is `b * 65536 + a`. Used by zlib for stream + integrity. Tests for-loop accumulating two refs, modular + arithmetic, and Char.code on s.[i]. 96 baseline programs total. - 2026-05-09 Phase 5.1 — gray_code.ml baseline (4-bit binary reflected Gray code, sum 120 + length 16 = 136). Single-formula generation: `gray[i] = i lxor (i lsr 1)`. Outputs a permutation of From be13f2dabac527180902e8c34ac3c9423dd2b19e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 19:27:14 +0000 Subject: [PATCH 190/298] ocaml: phase 5.1 majority_vote.ml baseline (Boyer-Moore majority, [3;3;4;2;4;4;2;4;4] = 4) O(n) time / O(1) space majority vote algorithm: let majority xs = let cand = ref 0 in let count = ref 0 in List.iter (fun x -> if !count = 0 then begin cand := x; count := 1 end else if x = !cand then count := !count + 1 else count := !count - 1 ) xs; !cand The candidate is updated to the current element whenever count reaches zero. When a strict majority exists, this guarantees the result. majority [3;3;4;2;4;4;2;4;4] = 4 (5 of 9, > n/2) 97 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/majority_vote.ml | 15 +++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 22 insertions(+) create mode 100644 lib/ocaml/baseline/majority_vote.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 4262f1f0..37132b62 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -51,6 +51,7 @@ "json_pretty.ml": 24, "kadane.ml": 6, "lambda_calc.ml": 7, + "majority_vote.ml": 4, "levenshtein.ml": 11, "memo_fib.ml": 75025, "mortgage.ml": 1073, diff --git a/lib/ocaml/baseline/majority_vote.ml b/lib/ocaml/baseline/majority_vote.ml new file mode 100644 index 00000000..aa0f7231 --- /dev/null +++ b/lib/ocaml/baseline/majority_vote.ml @@ -0,0 +1,15 @@ +let majority xs = + let cand = ref 0 in + let count = ref 0 in + List.iter (fun x -> + if !count = 0 then begin + cand := x; + count := 1 + end else if x = !cand then count := !count + 1 + else count := !count - 1 + ) xs; + !cand + +;; + +majority [3; 3; 4; 2; 4; 4; 2; 4; 4] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 01e2f3a0..9bd0f9ae 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — majority_vote.ml baseline (Boyer-Moore + majority, [3;3;4;2;4;4;2;4;4] → 4). O(n) time / O(1) space: + candidate-and-count refs; on match increment, on mismatch + decrement and replace candidate when count reaches zero. + Demonstrates the classical streaming algorithm. 97 baseline + programs total. - 2026-05-09 Phase 5.1 — adler32.ml baseline (Adler-32 checksum of "Wikipedia" = 300286872 = 0x11E60398). Two running sums modulo 65521; final checksum is `b * 65536 + a`. Used by zlib for stream From 14575a9cd7459bb2d4a350b73ca5e2e5c3dbf5ba Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 19:37:11 +0000 Subject: [PATCH 191/298] ocaml: phase 5.1 monotonic.ml baseline (monotonicity check, 4/5 inputs monotonic) Tracks two bool refs (inc, dec). For each pair of consecutive elements: if h < prev clear inc, if h > prev clear dec. Returns inc OR dec at the end: let is_monotonic xs = match xs with | [] -> true | [_] -> true | _ -> let inc = ref true in let dec = ref true in let rec walk prev rest = ... in (match xs with h :: t -> walk h t | [] -> ()); !inc || !dec Five test cases: [1;2;3;4] inc only true [4;3;2;1] dec only true [1;2;1] neither false [5;5;5] both (constant) true [] empty true (vacuous) sum = 4 98 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/monotonic.ml | 25 +++++++++++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 3 files changed, 37 insertions(+) create mode 100644 lib/ocaml/baseline/monotonic.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 37132b62..7a5bb436 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -59,6 +59,7 @@ "merge_sort.ml": 44, "merge_two.ml": 441, "module_use.ml": 3, + "monotonic.ml": 4, "newton_sqrt.ml": 1414, "mutable_record.ml": 10, "option_match.ml": 5, diff --git a/lib/ocaml/baseline/monotonic.ml b/lib/ocaml/baseline/monotonic.ml new file mode 100644 index 00000000..50020301 --- /dev/null +++ b/lib/ocaml/baseline/monotonic.ml @@ -0,0 +1,25 @@ +let is_monotonic xs = + match xs with + | [] -> true + | [_] -> true + | _ -> + let inc = ref true in + let dec = ref true in + let rec walk prev rest = + match rest with + | [] -> () + | h :: t -> + if h < prev then inc := false; + if h > prev then dec := false; + walk h t + in + (match xs with h :: t -> walk h t | [] -> ()); + !inc || !dec + +;; + +(if is_monotonic [1;2;3;4] then 1 else 0) + +(if is_monotonic [4;3;2;1] then 1 else 0) + +(if is_monotonic [1;2;1] then 1 else 0) + +(if is_monotonic [5;5;5] then 1 else 0) + +(if is_monotonic [] then 1 else 0) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9bd0f9ae..e73aeb8b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — monotonic.ml baseline (monotonicity check, + 4/5 inputs monotonic). Tracks two bool refs (inc, dec). Each pair + of consecutive elements: if `h < prev` clear `inc`, if `h > prev` + clear `dec`. Empty list and singleton are vacuously true. Five + test cases: + [1;2;3;4] inc only true + [4;3;2;1] dec only true + [1;2;1] neither false + [5;5;5] both (constant) true + [] empty true (vacuous) + Sum = 4. 98 baseline programs total. - 2026-05-09 Phase 5.1 — majority_vote.ml baseline (Boyer-Moore majority, [3;3;4;2;4;4;2;4;4] → 4). O(n) time / O(1) space: candidate-and-count refs; on match increment, on mismatch From 3759aad7a640f39afddaf682d47f9b5539f361dc Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 19:47:21 +0000 Subject: [PATCH 192/298] ocaml: phase 5.1 anagram_groups.ml baseline (group by canonical anagram, 3 groups) canonical builds a sorted-by-frequency string representation: let canonical s = let chars = Array.make 26 0 in for i = 0 to String.length s - 1 do let k = Char.code s.[i] - Char.code 'a' in if k >= 0 && k < 26 then chars.(k) <- chars.(k) + 1 done; expand into a-z order via a Buffer For 'eat', 'tea', 'ate' -> all canonicalise to 'aet'. For 'tan', 'nat' -> 'ant'. For 'bat' -> 'abt'. group_anagrams folds the input, accumulating per-key string lists; final answer is Hashtbl.length (number of distinct groups): ['eat'; 'tea'; 'tan'; 'ate'; 'nat'; 'bat'] -> 3 groups 99 baseline programs total. --- lib/ocaml/baseline/anagram_groups.ml | 29 ++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 38 insertions(+) create mode 100644 lib/ocaml/baseline/anagram_groups.ml diff --git a/lib/ocaml/baseline/anagram_groups.ml b/lib/ocaml/baseline/anagram_groups.ml new file mode 100644 index 00000000..eab32fe5 --- /dev/null +++ b/lib/ocaml/baseline/anagram_groups.ml @@ -0,0 +1,29 @@ +let canonical s = + let chars = Array.make 26 0 in + for i = 0 to String.length s - 1 do + let k = Char.code s.[i] - Char.code 'a' in + if k >= 0 && k < 26 then chars.(k) <- chars.(k) + 1 + done; + let buf = Buffer.create 26 in + for i = 0 to 25 do + for _ = 1 to chars.(i) do + Buffer.add_string buf (String.make 1 (Char.chr (i + Char.code 'a'))) + done + done; + Buffer.contents buf + +let group_anagrams xs = + let h = Hashtbl.create 8 in + List.iter (fun s -> + let k = canonical s in + let cur = match Hashtbl.find_opt h k with + | Some xs -> xs + | None -> [] + in + Hashtbl.replace h k (s :: cur) + ) xs; + Hashtbl.length h + +;; + +group_anagrams ["eat"; "tea"; "tan"; "ate"; "nat"; "bat"] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7a5bb436..bc8170b1 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -2,6 +2,7 @@ "ackermann.ml": 125, "adler32.ml": 300286872, "anagram_check.ml": 2, + "anagram_groups.ml": 3, "anagrams.ml": 3, "atm.ml": 120, "bag.ml": 3, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e73aeb8b..3c78bdef 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — anagram_groups.ml baseline (group strings + by canonical anagram form, ["eat";"tea";"tan";"ate";"nat";"bat"] + has 3 groups). canonical builds a sorted-by-frequency string + representation: count letters, then expand into a-z order. Used + as Hashtbl key. group_anagrams folds the input list, accumulating + per-key string lists; final answer is Hashtbl.length (number of + distinct groups). Tests count-then-expand canonical pattern + + Hashtbl as multimap. 99 baseline programs total. - 2026-05-09 Phase 5.1 — monotonic.ml baseline (monotonicity check, 4/5 inputs monotonic). Tracks two bool refs (inc, dec). Each pair of consecutive elements: if `h < prev` clear `inc`, if `h > prev` From 53968c24800f674644d48a8ac0b5c1050e115431 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 19:56:58 +0000 Subject: [PATCH 193/298] ocaml: phase 5.1 euler1.ml baseline (Project Euler #1, multiples of 3 or 5 below 1000 = 233168) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Project Euler #1: sum of all multiples of 3 or 5 below 1000. let euler1 limit = let sum = ref 0 in for i = 1 to limit - 1 do if i mod 3 = 0 || i mod 5 = 0 then sum := !sum + i done; !sum euler1 1000 = 233168 Trivial DSL exercise but symbolically meaningful: this is the 100th baseline program. 100 baseline programs total — milestone. --- lib/ocaml/baseline/euler1.ml | 10 ++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 3 +++ 3 files changed, 14 insertions(+) create mode 100644 lib/ocaml/baseline/euler1.ml diff --git a/lib/ocaml/baseline/euler1.ml b/lib/ocaml/baseline/euler1.ml new file mode 100644 index 00000000..42cb2d01 --- /dev/null +++ b/lib/ocaml/baseline/euler1.ml @@ -0,0 +1,10 @@ +let euler1 limit = + let sum = ref 0 in + for i = 1 to limit - 1 do + if i mod 3 = 0 || i mod 5 = 0 then sum := !sum + i + done; + !sum + +;; + +euler1 1000 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index bc8170b1..dd452b56 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -24,6 +24,7 @@ "csv.ml": 10, "exception_handle.ml": 4, "exception_user.ml": 26, + "euler1.ml": 233168, "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3c78bdef..66b49f0d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,9 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler1.ml baseline (Project Euler #1, sum + of multiples of 3 or 5 below 1000 = 233168). Trivial DSL exercise + but symbolically meaningful: this is the 100th baseline program. - 2026-05-09 Phase 5.1 — anagram_groups.ml baseline (group strings by canonical anagram form, ["eat";"tea";"tan";"ate";"nat";"bat"] has 3 groups). canonical builds a sorted-by-frequency string From 4840a9f6602c5024a8690cf5687081678b979b14 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 20:05:45 +0000 Subject: [PATCH 194/298] ocaml: phase 5.1 euler2.ml baseline (Project Euler #2, even Fib <= 4M = 4613732) Sum of even-valued Fibonacci numbers up to 4,000,000: let euler2 limit = let a = ref 1 in let b = ref 2 in let sum = ref 0 in while !a <= limit do if !a mod 2 = 0 then sum := !sum + !a; let c = !a + !b in a := !b; b := c done; !sum Sequence: 1, 2, 3, 5, 8, 13, 21, 34, ... Only every third term (2, 8, 34, 144, ...) is even. Sum below 4M: 4613732. 101 baseline programs total. --- lib/ocaml/baseline/euler2.ml | 15 +++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/euler2.ml diff --git a/lib/ocaml/baseline/euler2.ml b/lib/ocaml/baseline/euler2.ml new file mode 100644 index 00000000..debffb99 --- /dev/null +++ b/lib/ocaml/baseline/euler2.ml @@ -0,0 +1,15 @@ +let euler2 limit = + let a = ref 1 in + let b = ref 2 in + let sum = ref 0 in + while !a <= limit do + if !a mod 2 = 0 then sum := !sum + !a; + let c = !a + !b in + a := !b; + b := c + done; + !sum + +;; + +euler2 4000000 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index dd452b56..146a53ce 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -25,6 +25,7 @@ "exception_handle.ml": 4, "exception_user.ml": 26, "euler1.ml": 233168, + "euler2.ml": 4613732, "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 66b49f0d..5596f6bc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler2.ml baseline (Project Euler #2, sum + of even Fibonacci ≤ 4M = 4613732). Iterative two-ref Fibonacci, + accumulating only even terms. Sequence: 1, 2, 3, 5, 8, 13, 21, + 34... only 2, 8, 34, 144, ... contribute. 101 baseline programs + total. - 2026-05-09 Phase 5.1 — euler1.ml baseline (Project Euler #1, sum of multiples of 3 or 5 below 1000 = 233168). Trivial DSL exercise but symbolically meaningful: this is the 100th baseline program. From 2c7246e11d32de01a5a05bcf21a7635c3f2262c1 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 20:16:49 +0000 Subject: [PATCH 195/298] ocaml: phase 5.1 euler6.ml baseline (Project Euler #6, sum^2 - sum_sq for 1..100 = 25164150) Project Euler #6: difference between square of sum and sum of squares for 1..100. let euler6 n = let sum = ref 0 in let sum_sq = ref 0 in for i = 1 to n do sum := !sum + i; sum_sq := !sum_sq + i * i done; !sum * !sum - !sum_sq euler6 100 = 5050^2 - 338350 = 25502500 - 338350 = 25164150 102 baseline programs total. --- lib/ocaml/baseline/euler6.ml | 12 ++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 4 ++++ 3 files changed, 17 insertions(+) create mode 100644 lib/ocaml/baseline/euler6.ml diff --git a/lib/ocaml/baseline/euler6.ml b/lib/ocaml/baseline/euler6.ml new file mode 100644 index 00000000..d8d8cf5c --- /dev/null +++ b/lib/ocaml/baseline/euler6.ml @@ -0,0 +1,12 @@ +let euler6 n = + let sum = ref 0 in + let sum_sq = ref 0 in + for i = 1 to n do + sum := !sum + i; + sum_sq := !sum_sq + i * i + done; + !sum * !sum - !sum_sq + +;; + +euler6 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 146a53ce..5285bebd 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -26,6 +26,7 @@ "exception_user.ml": 26, "euler1.ml": 233168, "euler2.ml": 4613732, + "euler6.ml": 25164150, "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5596f6bc..253e4b2b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler6.ml baseline (Project Euler #6, square + of sum minus sum of squares for 1..100 = 25164150). Single for-loop + threading two refs; (sum 1..100)^2 - sum(i^2 for 1..100) = 5050^2 + - 338350 = 25502500 - 338350 = 25164150. 102 baseline programs total. - 2026-05-09 Phase 5.1 — euler2.ml baseline (Project Euler #2, sum of even Fibonacci ≤ 4M = 4613732). Iterative two-ref Fibonacci, accumulating only even terms. Sequence: 1, 2, 3, 5, 8, 13, 21, From 288c0f8c3e0a47700294ed20a3b9a03b4a5c9db3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 20:36:43 +0000 Subject: [PATCH 196/298] ocaml: phase 5.1 euler9.ml baseline (Project Euler #9, abc = 31875000) Find the unique Pythagorean triple with a + b + c = 1000 and return their product. The naive triple loop timed out under host contention (10-minute cap exceeded with ~333 * 999 ~= 333k inner iterations of complex checks). Rewritten with algebraic reduction: a + b + c = 1000 AND a^2 + b^2 = c^2 => b = (500000 - 1000 * a) / (1000 - a) so only the outer a-loop is needed (333 iterations). Single-pass form: for a = 1 to 333 do let num = 500000 - 1000 * a in let den = 1000 - a in if num mod den = 0 then begin let b = num / den in if b > a then let c = 1000 - a - b in if c > b then result := a * b * c end done Triple (200, 375, 425), product 31875000. 103 baseline programs total. --- lib/ocaml/baseline/euler9.ml | 17 +++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/euler9.ml diff --git a/lib/ocaml/baseline/euler9.ml b/lib/ocaml/baseline/euler9.ml new file mode 100644 index 00000000..f687278a --- /dev/null +++ b/lib/ocaml/baseline/euler9.ml @@ -0,0 +1,17 @@ +let euler9 () = + let result = ref 0 in + for a = 1 to 333 do + let num = 500000 - 1000 * a in + let den = 1000 - a in + if num mod den = 0 then begin + let b = num / den in + if b > a then + let c = 1000 - a - b in + if c > b then result := a * b * c + end + done; + !result + +;; + +euler9 () diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 5285bebd..ba6a4e27 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -27,6 +27,7 @@ "euler1.ml": 233168, "euler2.ml": 4613732, "euler6.ml": 25164150, + "euler9.ml": 31875000, "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 253e4b2b..bdc7788c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler9.ml baseline (Project Euler #9, abc + product for the unique Pythagorean triple with a+b+c=1000 → + 31875000). Naive triple loop times out under contention (10-min + cap); rewritten with algebraic reduction + `b = (500000 - 1000a) / (1000 - a)` so only one loop is needed. + Triple is (200, 375, 425). 103 baseline programs total. - 2026-05-09 Phase 5.1 — euler6.ml baseline (Project Euler #6, square of sum minus sum of squares for 1..100 = 25164150). Single for-loop threading two refs; (sum 1..100)^2 - sum(i^2 for 1..100) = 5050^2 From c8206e718a3f59daa7af17bb365d0f7e5157a2c9 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 20:46:42 +0000 Subject: [PATCH 197/298] ocaml: phase 5.1 max_product3.ml baseline (max product of 3, with negatives -> 300) Sort, then compare two candidates: p1 = product of three largest values p2 = product of two smallest (potentially negative) values and the largest For [-10;-10;1;3;2]: sorted = [-10;-10;1;2;3] p1 = 3 * 2 * 1 = 6 p2 = (-10) * (-10) * 3 = 300 max = 300 Tests List.sort + Array.of_list + arr.(n-i) end-walk + candidate-pick via if-then-else. 104 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/max_product3.ml | 11 +++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/max_product3.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ba6a4e27..f5d9c7f7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -51,6 +51,7 @@ "luhn.ml": 2, "mat_mul.ml": 621, "max_path_tree.ml": 11, + "max_product3.ml": 300, "max_run.ml": 5, "mod_inverse.ml": 27, "json_pretty.ml": 24, diff --git a/lib/ocaml/baseline/max_product3.ml b/lib/ocaml/baseline/max_product3.ml new file mode 100644 index 00000000..5ae5e769 --- /dev/null +++ b/lib/ocaml/baseline/max_product3.ml @@ -0,0 +1,11 @@ +let max_prod3 xs = + let sorted = List.sort compare xs in + let arr = Array.of_list sorted in + let n = Array.length arr in + let p1 = arr.(n - 1) * arr.(n - 2) * arr.(n - 3) in + let p2 = arr.(0) * arr.(1) * arr.(n - 1) in + if p1 > p2 then p1 else p2 + +;; + +max_prod3 [-10; -10; 1; 3; 2] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bdc7788c..42f866d5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — max_product3.ml baseline (max product of + three from a list including negatives = 300). Sort, then compare + product of three largest vs product of two smallest negatives and + one largest. For [-10;-10;1;3;2]: 3*2*1 = 6 vs (-10)*(-10)*3 = 300. + Tests List.sort + Array.of_list + arr.(n-i) end-walk + candidate + compare. 104 baseline programs total. - 2026-05-09 Phase 5.1 — euler9.ml baseline (Project Euler #9, abc product for the unique Pythagorean triple with a+b+c=1000 → 31875000). Naive triple loop times out under contention (10-min From 73917745a0916877a3dab72d757a2ef8065bf9b8 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 20:56:31 +0000 Subject: [PATCH 198/298] ocaml: phase 5.1 fizz_classifier.ml baseline (FizzBuzz with polymorphic variants, 1..30 weighted = 540) Two functions: classify n maps i to a polymorphic variant FizzBuzz | Fizz | Buzz | Num of int score x pattern-matches the variant to a weight FizzBuzz=100, Fizz=10, Buzz=5, Num n=n For i in 1..30: FizzBuzz at 15, 30: 2 * 100 = 200 Fizz at 3,6,9,12,18,21,24,27: 8 * 10 = 80 Buzz at 5,10,20,25: 4 * 5 = 20 Num: rest (16 numbers) = 240 total = 540 Exercises polymorphic-variant match (iter 87) including a payload-bearing 'Num n' arm. 105 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/fizz_classifier.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/fizz_classifier.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f5d9c7f7..48ce7dd1 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -45,6 +45,7 @@ "hanoi.ml": 1023, "hist.ml": 75, "int_sqrt.ml": 1027, + "fizz_classifier.ml": 540, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, "list_ops.ml": 30, diff --git a/lib/ocaml/baseline/fizz_classifier.ml b/lib/ocaml/baseline/fizz_classifier.ml new file mode 100644 index 00000000..95d08f55 --- /dev/null +++ b/lib/ocaml/baseline/fizz_classifier.ml @@ -0,0 +1,21 @@ +let classify n = + let by3 = n mod 3 = 0 in + let by5 = n mod 5 = 0 in + if by3 && by5 then `FizzBuzz + else if by3 then `Fizz + else if by5 then `Buzz + else `Num n + +let score x = match x with + | `FizzBuzz -> 100 + | `Fizz -> 10 + | `Buzz -> 5 + | `Num n -> n + +;; + +let total = ref 0 in +for i = 1 to 30 do + total := !total + score (classify i) +done; +!total diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 42f866d5..410a534f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — fizz_classifier.ml baseline (FizzBuzz with + polymorphic variants, 1..30 weighted score = 540). Two functions: + classify maps i → ` `FizzBuzz | `Fizz | `Buzz | `Num n``, and score + pattern-matches the variant to a weight (100/10/5/value). For + i=1..30: 200 + 80 + 20 + 240 = 540. Exercises polyvariant + match (iter 87) including a payload-bearing `` `Num n``. 105 + baseline programs total. - 2026-05-09 Phase 5.1 — max_product3.ml baseline (max product of three from a list including negatives = 300). Sort, then compare product of three largest vs product of two smallest negatives and From d4eb57fa07bf4a46078a7bc42dbf8be32e1ef68e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 21:06:10 +0000 Subject: [PATCH 199/298] ocaml: phase 5.1 catalan.ml baseline (Catalan number C(5) = 42) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP recurrence: C(0) = 1 C(n) = sum_{j=0}^{n-1} C(j) * C(n-1-j) let catalan n = let dp = Array.make (n + 1) 0 in dp.(0) <- 1; for i = 1 to n do for j = 0 to i - 1 do dp.(i) <- dp.(i) + dp.(j) * dp.(i - 1 - j) done done; dp.(n) C(5) = 42 — also the count of distinct binary trees with 5 internal nodes, balanced paren strings of length 10, monotonic lattice paths, etc. 106 baseline programs total. --- lib/ocaml/baseline/catalan.ml | 13 +++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/catalan.ml diff --git a/lib/ocaml/baseline/catalan.ml b/lib/ocaml/baseline/catalan.ml new file mode 100644 index 00000000..b487ca47 --- /dev/null +++ b/lib/ocaml/baseline/catalan.ml @@ -0,0 +1,13 @@ +let catalan n = + let dp = Array.make (n + 1) 0 in + dp.(0) <- 1; + for i = 1 to n do + for j = 0 to i - 1 do + dp.(i) <- dp.(i) + dp.(j) * dp.(i - 1 - j) + done + done; + dp.(n) + +;; + +catalan 5 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 48ce7dd1..7613a491 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -18,6 +18,7 @@ "bsearch.ml": 7, "caesar.ml": 215, "calc.ml": 13, + "catalan.ml": 42, "closures.ml": 315, "coin_change.ml": 6, "count_change.ml": 406, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 410a534f..e9809c3a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — catalan.ml baseline (Catalan number C(5) + via DP recurrence = 42). DP recurrence `C(n) = sum_{j=0}^{n-1} + C(j) * C(n-1-j)`. C(5) = 42 — also the count of distinct binary + trees with 5 internal nodes, balanced paren strings of length + 10, etc. Tests nested for-loop over Array with arr.(i) read + + write. 106 baseline programs total. - 2026-05-09 Phase 5.1 — fizz_classifier.ml baseline (FizzBuzz with polymorphic variants, 1..30 weighted score = 540). Two functions: classify maps i → ` `FizzBuzz | `Fizz | `Buzz | `Num n``, and score From c16a8f2d53bf9362609a7d80d81bacbe34c3f5f1 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 21:16:40 +0000 Subject: [PATCH 200/298] ocaml: phase 5.1 is_prime.ml baseline (count primes <= 100 = 25) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trial division up to sqrt(n) with early-exit via bool ref: let is_prime n = if n < 2 then false else let p = ref true in let i = ref 2 in while !i * !i <= n && !p do if n mod !i = 0 then p := false; i := !i + 1 done; !p Outer count_primes loops 2..n calling is_prime, accumulating count. Returns 25 — the canonical prime-counting function pi(100). 107 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/is_prime.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/is_prime.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7613a491..862e864c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -46,6 +46,7 @@ "hanoi.ml": 1023, "hist.ml": 75, "int_sqrt.ml": 1027, + "is_prime.ml": 25, "fizz_classifier.ml": 540, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, diff --git a/lib/ocaml/baseline/is_prime.ml b/lib/ocaml/baseline/is_prime.ml new file mode 100644 index 00000000..8e97fb0f --- /dev/null +++ b/lib/ocaml/baseline/is_prime.ml @@ -0,0 +1,21 @@ +let is_prime n = + if n < 2 then false + else + let p = ref true in + let i = ref 2 in + while !i * !i <= n && !p do + if n mod !i = 0 then p := false; + i := !i + 1 + done; + !p + +let count_primes n = + let c = ref 0 in + for i = 2 to n do + if is_prime i then c := !c + 1 + done; + !c + +;; + +count_primes 100 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e9809c3a..56749bed 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — is_prime.ml baseline (count primes ≤ 100 = + 25). Trial division up to √n with early-exit via bool ref. Loop + 2..n calling is_prime, accumulate count. Returns 25 (the canonical + prime-counting function π(100)). Tests two cooperating functions + + while-with-bool-short-circuit + nested for. 107 baseline + programs total. - 2026-05-09 Phase 5.1 — catalan.ml baseline (Catalan number C(5) via DP recurrence = 42). DP recurrence `C(n) = sum_{j=0}^{n-1} C(j) * C(n-1-j)`. C(5) = 42 — also the count of distinct binary From cecde8733ac6c8b1d1b7069d980433e9bb676f97 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 21:26:31 +0000 Subject: [PATCH 201/298] ocaml: phase 5.1 partition.ml baseline (stable partition, evens*100 + odds = 3025) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two ref lists accumulating in reverse, then List.rev'd — preserves original order: let partition pred xs = let yes = ref [] in let no = ref [] in List.iter (fun x -> if pred x then yes := x :: !yes else no := x :: !no ) xs; (List.rev !yes, List.rev !no) partition (fun x -> x mod 2 = 0) [1..10] -> ([2;4;6;8;10], [1;3;5;7;9]) evens sum * 100 + odds sum = 30 * 100 + 25 = 3025 Tests higher-order predicate, tuple return, and iter-98 let-tuple destructuring on the call site. 108 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/partition.ml | 13 +++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/partition.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 862e864c..95229766 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -74,6 +74,7 @@ "option_match.ml": 5, "palindrome.ml": 4, "paren_depth.ml": 7, + "partition.ml": 3025, "pancake_sort.ml": 910, "pascal.ml": 252, "peano.ml": 30, diff --git a/lib/ocaml/baseline/partition.ml b/lib/ocaml/baseline/partition.ml new file mode 100644 index 00000000..b1cf0bf6 --- /dev/null +++ b/lib/ocaml/baseline/partition.ml @@ -0,0 +1,13 @@ +let partition pred xs = + let yes = ref [] in + let no = ref [] in + List.iter (fun x -> + if pred x then yes := x :: !yes + else no := x :: !no + ) xs; + (List.rev !yes, List.rev !no) + +;; + +let (evens, odds) = partition (fun x -> x mod 2 = 0) [1;2;3;4;5;6;7;8;9;10] in +List.fold_left (+) 0 evens * 100 + List.fold_left (+) 0 odds diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 56749bed..5b89f459 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — partition.ml baseline (stable partition by + predicate, 30*100 + 25 = 3025). Two ref lists accumulating in + reverse, then List.rev'd — preserves original order. Test: + `partition (fun x -> x mod 2 = 0) [1..10]` → ([2;4;6;8;10], + [1;3;5;7;9]) → 30*100 + 25 = 3025. Tests higher-order predicate + + tuple return + iter-98 let-tuple destructuring. 108 baseline + programs total. - 2026-05-09 Phase 5.1 — is_prime.ml baseline (count primes ≤ 100 = 25). Trial division up to √n with early-exit via bool ref. Loop 2..n calling is_prime, accumulate count. Returns 25 (the canonical From 00ffba930661165274ad77ef7d9b4ad159a08feb Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 21:35:59 +0000 Subject: [PATCH 202/298] ocaml: phase 5.1 euler5.ml baseline (smallest multiple of 1..20 = 232792560) Iteratively takes lcm of running result with i: let rec gcd a b = if b = 0 then a else gcd b (a mod b) let lcm a b = a * b / gcd a b let euler5 n = let r = ref 1 in for i = 2 to n do r := lcm !r i done; !r euler5 20 = 232792560 = 2^4 * 3^2 * 5 * 7 * 11 * 13 * 17 * 19 Tests gcd_lcm composition (iter 140) on a fresh problem. 109 baseline programs total. --- lib/ocaml/baseline/euler5.ml | 11 +++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/euler5.ml diff --git a/lib/ocaml/baseline/euler5.ml b/lib/ocaml/baseline/euler5.ml new file mode 100644 index 00000000..949015a3 --- /dev/null +++ b/lib/ocaml/baseline/euler5.ml @@ -0,0 +1,11 @@ +let rec gcd a b = if b = 0 then a else gcd b (a mod b) +let lcm a b = a * b / gcd a b +let euler5 n = + let r = ref 1 in + for i = 2 to n do + r := lcm !r i + done; + !r +;; + +euler5 20 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 95229766..99f9f0a7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -27,6 +27,7 @@ "exception_user.ml": 26, "euler1.ml": 233168, "euler2.ml": 4613732, + "euler5.ml": 232792560, "euler6.ml": 25164150, "euler9.ml": 31875000, "expr_eval.ml": 16, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5b89f459..d523a6a1 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler5.ml baseline (Project Euler #5, + smallest number divisible by all 1..20 = 232792560). Iteratively + takes lcm of running result with i for i=2..n; lcm via gcd via + Euclidean. 232792560 = 2^4 * 3^2 * 5 * 7 * 11 * 13 * 17 * 19. + Tests two-line gcd/lcm + for-loop accumulator pattern. 109 + baseline programs total. - 2026-05-09 Phase 5.1 — partition.ml baseline (stable partition by predicate, 30*100 + 25 = 3025). Two ref lists accumulating in reverse, then List.rev'd — preserves original order. Test: From 853504642f93d75f3704393419d22e62dfe2eea0 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 21:46:16 +0000 Subject: [PATCH 203/298] ocaml: phase 5.1 euler10.ml baseline (sum of primes <= 100 = 1060, scaled-down PE10) Sieve of Eratosthenes followed by a sum loop: let sieve_sum n = let s = Array.make (n + 1) true in s.(0) <- false; s.(1) <- false; for i = 2 to n do if s.(i) then begin let j = ref (i * i) in while !j <= n do s.(!j) <- false; j := !j + i done end done; let total = ref 0 in for i = 2 to n do if s.(i) then total := !total + i done; !total Real PE10 asks for sum below 2,000,000; that's a ~2-3 second loop in native OCaml but minutes-to-hours under our contended-host spec-level evaluator. 100 keeps the run under 3 minutes while still exercising the same algorithm. 110 baseline programs total. --- lib/ocaml/baseline/euler10.ml | 22 ++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 4 ++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/euler10.ml diff --git a/lib/ocaml/baseline/euler10.ml b/lib/ocaml/baseline/euler10.ml new file mode 100644 index 00000000..72b16caa --- /dev/null +++ b/lib/ocaml/baseline/euler10.ml @@ -0,0 +1,22 @@ +let sieve_sum n = + let s = Array.make (n + 1) true in + s.(0) <- false; + s.(1) <- false; + for i = 2 to n do + if s.(i) then begin + let j = ref (i * i) in + while !j <= n do + s.(!j) <- false; + j := !j + i + done + end + done; + let total = ref 0 in + for i = 2 to n do + if s.(i) then total := !total + i + done; + !total + +;; + +sieve_sum 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 99f9f0a7..0bb688fd 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -26,6 +26,7 @@ "exception_handle.ml": 4, "exception_user.ml": 26, "euler1.ml": 233168, + "euler10.ml": 1060, "euler2.ml": 4613732, "euler5.ml": 232792560, "euler6.ml": 25164150, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d523a6a1..2dddfaff 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler10.ml baseline (sum of primes ≤ 100 = + 1060, scaled-down Project Euler #10). Sieve of Eratosthenes + followed by a sum loop. Used 100 instead of 2 million to fit our + contended host's runtime budget. 110 baseline programs total. - 2026-05-09 Phase 5.1 — euler5.ml baseline (Project Euler #5, smallest number divisible by all 1..20 = 232792560). Iteratively takes lcm of running result with i for i=2..n; lcm via gcd via From 533be5b36befa35c30a4ab1434eae3c6add98d64 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 21:59:23 +0000 Subject: [PATCH 204/298] ocaml: phase 5.1 euler4_small.ml baseline (largest 2-digit palindrome product = 9009) Scaled-down Project Euler #4. Real version uses 3-digit numbers yielding 906609 = 913 * 993; that's an 810k-iteration nested loop that times out under our contended-host spec-level evaluator. The 2-digit version (10..99) is fast enough and tests the same algorithm: 9009 = 91 * 99 (the only 2-digit-product palindrome > 9000) Implementation: is_pal n index-walk comparing s.[i] to s.[len-1-i] euler4 lo hi nested for with running max + early-skip via 'p > !m && is_pal p' short-circuit 111 baseline programs total. --- lib/ocaml/baseline/euler4_small.ml | 21 +++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/euler4_small.ml diff --git a/lib/ocaml/baseline/euler4_small.ml b/lib/ocaml/baseline/euler4_small.ml new file mode 100644 index 00000000..13a81b9a --- /dev/null +++ b/lib/ocaml/baseline/euler4_small.ml @@ -0,0 +1,21 @@ +let is_pal n = + let s = string_of_int n in + let len = String.length s in + let p = ref true in + for i = 0 to len / 2 - 1 do + if s.[i] <> s.[len - 1 - i] then p := false + done; + !p + +let euler4 lo hi = + let m = ref 0 in + for a = lo to hi do + for b = a to hi do + let p = a * b in + if p > !m && is_pal p then m := p + done + done; + !m +;; + +euler4 10 99 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 0bb688fd..e9824aa4 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -28,6 +28,7 @@ "euler1.ml": 233168, "euler10.ml": 1060, "euler2.ml": 4613732, + "euler4_small.ml": 9009, "euler5.ml": 232792560, "euler6.ml": 25164150, "euler9.ml": 31875000, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2dddfaff..5c1d1f87 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler4_small.ml baseline (largest 2-digit + palindrome product = 9009 = 91 * 99). Scaled-down Project Euler + #4 (real version uses 3-digit numbers, 906609; that's 810k inner + iterations and would time out under contention). Tests palindrome + predicate via index-walk + nested for. 111 baseline programs total. - 2026-05-09 Phase 5.1 — euler10.ml baseline (sum of primes ≤ 100 = 1060, scaled-down Project Euler #10). Sieve of Eratosthenes followed by a sum loop. Used 100 instead of 2 million to fit our From 2a01758f28c37a3c74b796b0189ff6398c48587d Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 22:11:11 +0000 Subject: [PATCH 205/298] ocaml: phase 5.1 euler7.ml baseline (100th prime = 541) Scaled-down PE7 (real version asks for the 10001st prime = 104743). Trial-division within an outer while loop searching forward from 2, short-circuited via bool ref: let nth_prime n = let count = ref 0 in let i = ref 1 in let result = ref 0 in while !count < n do i := !i + 1; let p = ref true in let j = ref 2 in while !j * !j <= !i && !p do if !i mod !j = 0 then p := false; j := !j + 1 done; if !p then begin count := !count + 1; if !count = n then result := !i end done; !result nth_prime 100 = 541 100 keeps the run under our 3-minute budget while exercising the same algorithm. 112 baseline programs total. --- lib/ocaml/baseline/euler7.ml | 22 ++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/euler7.ml diff --git a/lib/ocaml/baseline/euler7.ml b/lib/ocaml/baseline/euler7.ml new file mode 100644 index 00000000..e4ad4b1e --- /dev/null +++ b/lib/ocaml/baseline/euler7.ml @@ -0,0 +1,22 @@ +let nth_prime n = + let count = ref 0 in + let i = ref 1 in + let result = ref 0 in + while !count < n do + i := !i + 1; + let p = ref true in + let j = ref 2 in + while !j * !j <= !i && !p do + if !i mod !j = 0 then p := false; + j := !j + 1 + done; + if !p then begin + count := !count + 1; + if !count = n then result := !i + end + done; + !result + +;; + +nth_prime 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e9824aa4..1db73d94 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "euler4_small.ml": 9009, "euler5.ml": 232792560, "euler6.ml": 25164150, + "euler7.ml": 541, "euler9.ml": 31875000, "expr_eval.ml": 16, "expr_simp.ml": 22, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5c1d1f87..1f39a248 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler7.ml baseline (100th prime = 541; + scaled-down PE7 which asks for the 10001st = 104743). Trial- + division within an outer while loop searching forward from 2, + short-circuited via bool ref. 100 keeps under 3-min budget while + exercising the same algorithm. 112 baseline programs total. - 2026-05-09 Phase 5.1 — euler4_small.ml baseline (largest 2-digit palindrome product = 9009 = 91 * 99). Scaled-down Project Euler #4 (real version uses 3-digit numbers, 906609; that's 810k inner From 320d78a9935006858b1f13261b1b14aa212ac086 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 22:21:16 +0000 Subject: [PATCH 206/298] ocaml: phase 5.1 euler3.ml baseline (largest prime factor of 13195 = 29) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PE3's worked example. Trial-division streaming: when the current factor divides m, divide and update largest; otherwise bump factor: let largest_prime_factor n = let m = ref n in let factor = ref 2 in let largest = ref 0 in while !m > 1 do if !m mod !factor = 0 then begin largest := !factor; m := !m / !factor end else factor := !factor + 1 done; !largest largest_prime_factor 13195 = 29 (= 5 * 7 * 13 * 29) The full PE3 number 600851475143 exceeds JS safe-int (2^53 ≈ 9e15 in float terms; 6e11 is fine but the intermediate 'i mod !factor' on the way to 6857 can overflow precision). 13195 keeps the program portable across hosts. 113 baseline programs total. --- lib/ocaml/baseline/euler3.ml | 15 +++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 22 insertions(+) create mode 100644 lib/ocaml/baseline/euler3.ml diff --git a/lib/ocaml/baseline/euler3.ml b/lib/ocaml/baseline/euler3.ml new file mode 100644 index 00000000..e8b5c730 --- /dev/null +++ b/lib/ocaml/baseline/euler3.ml @@ -0,0 +1,15 @@ +let largest_prime_factor n = + let m = ref n in + let factor = ref 2 in + let largest = ref 0 in + while !m > 1 do + if !m mod !factor = 0 then begin + largest := !factor; + m := !m / !factor + end else factor := !factor + 1 + done; + !largest + +;; + +largest_prime_factor 13195 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 1db73d94..1ffa7432 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -28,6 +28,7 @@ "euler1.ml": 233168, "euler10.ml": 1060, "euler2.ml": 4613732, + "euler3.ml": 29, "euler4_small.ml": 9009, "euler5.ml": 232792560, "euler6.ml": 25164150, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1f39a248..5b103d9c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler3.ml baseline (largest prime factor of + 13195 = 29; PE3's worked example). Trial-division streaming: when + the current factor divides m, divide and update largest; otherwise + bump factor. Numbers like 600851475143 (the actual PE3) exceed JS + safe-int (2^53 ≈ 9e15), so 13195 keeps the program portable. 113 + baseline programs total. - 2026-05-09 Phase 5.1 — euler7.ml baseline (100th prime = 541; scaled-down PE7 which asks for the 10001st = 104743). Trial- division within an outer while loop searching forward from 2, From 5959989324ecdaa869e9184d692c826fee039c3b Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 22:31:27 +0000 Subject: [PATCH 207/298] ocaml: phase 5.1 euler25.ml baseline (first 12-digit Fibonacci index = 55) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Iteratively grows two refs while the larger is below 10^(n-1), counting iterations: let euler25 n = let a = ref 1 in let b = ref 1 in let i = ref 2 in let target = ref 1 in for _ = 1 to n - 1 do target := !target * 10 done; while !b < !target do let c = !a + !b in a := !b; b := c; i := !i + 1 done; !i euler25 12 = 55 (F(55) = 139_583_862_445, 12 digits) Real PE25 asks for 1000 digits (answer 4782); 12 keeps within safe-int while exercising the identical algorithm. 114 baseline programs total — 200 iterations landed. --- lib/ocaml/baseline/euler25.ml | 17 +++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/euler25.ml diff --git a/lib/ocaml/baseline/euler25.ml b/lib/ocaml/baseline/euler25.ml new file mode 100644 index 00000000..67d3d0a5 --- /dev/null +++ b/lib/ocaml/baseline/euler25.ml @@ -0,0 +1,17 @@ +let euler25 n = + let a = ref 1 in + let b = ref 1 in + let i = ref 2 in + let target = ref 1 in + for _ = 1 to n - 1 do target := !target * 10 done; + while !b < !target do + let c = !a + !b in + a := !b; + b := c; + i := !i + 1 + done; + !i + +;; + +euler25 12 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 1ffa7432..2b29b303 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -28,6 +28,7 @@ "euler1.ml": 233168, "euler10.ml": 1060, "euler2.ml": 4613732, + "euler25.ml": 55, "euler3.ml": 29, "euler4_small.ml": 9009, "euler5.ml": 232792560, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5b103d9c..ce54df19 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler25.ml baseline (first Fibonacci index + with 12 digits = 55). Iteratively grows two refs while the larger + is below `10^(n-1)`, counting iterations. Real PE25 asks for 1000 + digits (= 4782); 12 keeps within safe-int while exercising the + identical algorithm. 114 baseline programs total — and 200 + iterations landed. - 2026-05-09 Phase 5.1 — euler3.ml baseline (largest prime factor of 13195 = 29; PE3's worked example). Trial-division streaming: when the current factor divides m, divide and update largest; otherwise From 391a2d0c4fb33c05878e6e5b1e2d7f2335b29a7e Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 22:43:45 +0000 Subject: [PATCH 208/298] ocaml: phase 5.1 euler16.ml baseline (digit sum of 2^15 = 26) Computes 2^n via for-loop multiplication, then walks the digits via mod 10 / div 10: let euler16 n = let p = ref 1 in for _ = 1 to n do p := !p * 2 done; let sum = ref 0 in let m = ref !p in while !m > 0 do sum := !sum + !m mod 10; m := !m / 10 done; !sum euler16 15 = 3 + 2 + 7 + 6 + 8 = 26 (= digit sum of 32768) Real PE16 asks for 2^1000 which exceeds float precision; 2^15 stays safe and exercises the same digit-decomposition pattern. 115 baseline programs total. --- lib/ocaml/baseline/euler16.ml | 14 ++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/euler16.ml diff --git a/lib/ocaml/baseline/euler16.ml b/lib/ocaml/baseline/euler16.ml new file mode 100644 index 00000000..55e98631 --- /dev/null +++ b/lib/ocaml/baseline/euler16.ml @@ -0,0 +1,14 @@ +let euler16 n = + let p = ref 1 in + for _ = 1 to n do p := !p * 2 done; + let sum = ref 0 in + let m = ref !p in + while !m > 0 do + sum := !sum + !m mod 10; + m := !m / 10 + done; + !sum + +;; + +euler16 15 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 2b29b303..c228f1b7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -26,6 +26,7 @@ "exception_handle.ml": 4, "exception_user.ml": 26, "euler1.ml": 233168, + "euler16.ml": 26, "euler10.ml": 1060, "euler2.ml": 4613732, "euler25.ml": 55, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ce54df19..547cc29c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler16.ml baseline (digit sum of 2^15 = + 26). Computes 2^n via for-loop accumulation, then walks the digits + via mod 10 / div 10 to sum them. Real PE16 asks for 2^1000 which + exceeds float precision; 2^15 = 32768 stays safe and exercises the + same digit-decomposition pattern. 115 baseline programs total. - 2026-05-09 Phase 5.1 — euler25.ml baseline (first Fibonacci index with 12 digits = 55). Iteratively grows two refs while the larger is below `10^(n-1)`, counting iterations. Real PE25 asks for 1000 From 89a807a1ed0840a8308ec7556657a63322e528f4 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 22:53:27 +0000 Subject: [PATCH 209/298] ocaml: phase 5.1 euler14.ml baseline (longest Collatz under 100, starting n = 97) collatz_len walks n through n/2 if even, 3n+1 if odd, counting steps. Outer loop scans 2..N tracking the best length and arg-best: let euler14 limit = let best = ref 0 in let best_n = ref 0 in for n = 2 to limit do let l = collatz_len n in if l > !best then begin best := l; best_n := n end done; !best_n euler14 100 = 97 (97 generates a 118-step chain) Real PE14 uses limit = 1_000_000 (answer 837799); 100 exercises the same algorithm in <2 minutes on our contended host. 116 baseline programs total. --- lib/ocaml/baseline/euler14.ml | 25 +++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 33 insertions(+) create mode 100644 lib/ocaml/baseline/euler14.ml diff --git a/lib/ocaml/baseline/euler14.ml b/lib/ocaml/baseline/euler14.ml new file mode 100644 index 00000000..7e7e908e --- /dev/null +++ b/lib/ocaml/baseline/euler14.ml @@ -0,0 +1,25 @@ +let collatz_len n = + let m = ref n in + let c = ref 0 in + while !m > 1 do + if !m mod 2 = 0 then m := !m / 2 + else m := 3 * !m + 1; + c := !c + 1 + done; + !c + +let euler14 limit = + let best = ref 0 in + let best_n = ref 0 in + for n = 2 to limit do + let l = collatz_len n in + if l > !best then begin + best := l; + best_n := n + end + done; + !best_n + +;; + +euler14 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c228f1b7..587883cb 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -28,6 +28,7 @@ "euler1.ml": 233168, "euler16.ml": 26, "euler10.ml": 1060, + "euler14.ml": 97, "euler2.ml": 4613732, "euler25.ml": 55, "euler3.ml": 29, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 547cc29c..a0cba117 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler14.ml baseline (longest Collatz chain + starting under N=100 → 97). collatz_len walks n through the + iteration n/2 if even, 3n+1 if odd, counting steps. Outer loop + scans 2..N tracking best length and arg-best. n=97 generates the + 118-step chain. Real PE14 uses N=1,000,000 (answer 837799); N=100 + exercises the same algorithm in <2 minutes. 116 baseline programs + total. - 2026-05-09 Phase 5.1 — euler16.ml baseline (digit sum of 2^15 = 26). Computes 2^n via for-loop accumulation, then walks the digits via mod 10 / div 10 to sum them. Real PE16 asks for 2^1000 which From ea7120751dd6ac742b51d1ebc634f009e28687c3 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 23:03:06 +0000 Subject: [PATCH 210/298] ocaml: phase 5.1 euler28.ml baseline (sum of diagonals in 7x7 spiral = 261) For each layer 1..(n-1)/2, the four corners of an Ulam spiral are spaced 2*layer apart. Step k four times per layer, accumulate: let euler28 n = let s = ref 1 in let k = ref 1 in for layer = 1 to (n - 1) / 2 do let step = 2 * layer in for _ = 1 to 4 do k := !k + step; s := !s + !k done done; !s euler28 7 = 1 + (3+5+7+9) + (13+17+21+25) + (31+37+43+49) = 261 Real PE28 uses 1001x1001 (answer 669171001); 7x7 is fast. 117 baseline programs total. --- lib/ocaml/baseline/euler28.ml | 15 +++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 22 insertions(+) create mode 100644 lib/ocaml/baseline/euler28.ml diff --git a/lib/ocaml/baseline/euler28.ml b/lib/ocaml/baseline/euler28.ml new file mode 100644 index 00000000..42ebf770 --- /dev/null +++ b/lib/ocaml/baseline/euler28.ml @@ -0,0 +1,15 @@ +let euler28 n = + let s = ref 1 in + let k = ref 1 in + for layer = 1 to (n - 1) / 2 do + let step = 2 * layer in + for _ = 1 to 4 do + k := !k + step; + s := !s + !k + done + done; + !s + +;; + +euler28 7 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 587883cb..350f927a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "euler14.ml": 97, "euler2.ml": 4613732, "euler25.ml": 55, + "euler28.ml": 261, "euler3.ml": 29, "euler4_small.ml": 9009, "euler5.ml": 232792560, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index a0cba117..febf789e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler28.ml baseline (sum of diagonals in + 7x7 number spiral = 261). For each layer (1..(n-1)/2) the four + corners are spaced 2*layer apart, so we step `k` four times per + layer and accumulate into s. Real PE28 uses 1001x1001 (answer + 669171001); 7x7 is fast enough to land in seconds. 117 baseline + programs total. - 2026-05-09 Phase 5.1 — euler14.ml baseline (longest Collatz chain starting under N=100 → 97). collatz_len walks n through the iteration n/2 if even, 3n+1 if odd, counting steps. Outer loop From 46e49dc947898421e9767aa88dbb977f306980da Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 23:17:00 +0000 Subject: [PATCH 211/298] ocaml: phase 5.1 euler30_cube.ml baseline (sum of digit-cube narcissistic numbers <= 999 = 1301) Numbers equal to the sum of cubes of their digits: 153 = 1 + 125 + 27 370 = 27 + 343 + 0 371 = 27 + 343 + 1 407 = 64 + 0 + 343 sum = 1301 Implementation: pow_digit_sum n p walk digits of n, accumulate d^p euler30 p limit scan 2..limit and sum where pow_digit_sum n p = n Real PE30 uses 5th powers (answer 443839); the cube version exercises the same algorithm in a smaller search space. 118 baseline programs total. --- lib/ocaml/baseline/euler30_cube.ml | 22 ++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/euler30_cube.ml diff --git a/lib/ocaml/baseline/euler30_cube.ml b/lib/ocaml/baseline/euler30_cube.ml new file mode 100644 index 00000000..25b40687 --- /dev/null +++ b/lib/ocaml/baseline/euler30_cube.ml @@ -0,0 +1,22 @@ +let pow_digit_sum n p = + let m = ref n in + let s = ref 0 in + while !m > 0 do + let d = !m mod 10 in + let pd = ref 1 in + for _ = 1 to p do pd := !pd * d done; + s := !s + !pd; + m := !m / 10 + done; + !s + +let euler30 p limit = + let total = ref 0 in + for n = 2 to limit do + if pow_digit_sum n p = n then total := !total + n + done; + !total + +;; + +euler30 3 999 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 350f927a..5f65b1ab 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -32,6 +32,7 @@ "euler2.ml": 4613732, "euler25.ml": 55, "euler28.ml": 261, + "euler30_cube.ml": 1301, "euler3.ml": 29, "euler4_small.ml": 9009, "euler5.ml": 232792560, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index febf789e..9aed8e0b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler30_cube.ml baseline (sum of numbers + equal to sum of cubes of their digits, ≤999 = 1301). The full + numbers are 153 + 370 + 371 + 407 = 1301. PE30 proper uses 5th + powers (answer 443839); cube version exercises the same algorithm + in a smaller search space. 118 baseline programs total. - 2026-05-09 Phase 5.1 — euler28.ml baseline (sum of diagonals in 7x7 number spiral = 261). For each layer (1..(n-1)/2) the four corners are spaced 2*layer apart, so we step `k` four times per From 87f9a84365b2824ce481978edf55b30a59915d49 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 23:27:15 +0000 Subject: [PATCH 212/298] ocaml: phase 5.1 euler21_small.ml baseline (sum of amicable numbers <= 300 = 504) div_sum computes proper divisor sum via trial division up to sqrt(n): let div_sum n = let s = ref 1 in let i = ref 2 in while !i * !i <= n do if n mod !i = 0 then begin s := !s + !i; let q = n / !i in if q <> !i then s := !s + q end; i := !i + 1 done; if n = 1 then 0 else !s Outer loop finds amicable pairs (a, b) with d(a) = b, d(b) = a, a != b. Only pair under 300 is (220, 284); 220 + 284 = 504. Real PE21 uses 10000 (answer 31626). 300 keeps the run under budget while exercising the same divisor-sum trick. 119 baseline programs total. --- lib/ocaml/baseline/euler21_small.ml | 25 +++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/euler21_small.ml diff --git a/lib/ocaml/baseline/euler21_small.ml b/lib/ocaml/baseline/euler21_small.ml new file mode 100644 index 00000000..45e64e98 --- /dev/null +++ b/lib/ocaml/baseline/euler21_small.ml @@ -0,0 +1,25 @@ +let div_sum n = + let s = ref 1 in + let i = ref 2 in + while !i * !i <= n do + if n mod !i = 0 then begin + s := !s + !i; + let q = n / !i in + if q <> !i then s := !s + q + end; + i := !i + 1 + done; + if n = 1 then 0 else !s + +let euler21 limit = + let total = ref 0 in + for a = 2 to limit do + let b = div_sum a in + if b <> a && b > a && b <= limit && div_sum b = a then + total := !total + a + b + done; + !total + +;; + +euler21 300 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 5f65b1ab..55e60ad7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -30,6 +30,7 @@ "euler10.ml": 1060, "euler14.ml": 97, "euler2.ml": 4613732, + "euler21_small.ml": 504, "euler25.ml": 55, "euler28.ml": 261, "euler30_cube.ml": 1301, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9aed8e0b..73af64c8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler21_small.ml baseline (sum of amicable + numbers ≤ 300 = 504). div_sum computes proper divisor sum via + trial division up to √n; outer loop finds amicable pairs (a, b) + with d(a) = b, d(b) = a, a ≠ b. Only pair under 300 is (220, 284). + Real PE21 uses 10000 (answer 31626). 119 baseline programs total. - 2026-05-09 Phase 5.1 — euler30_cube.ml baseline (sum of numbers equal to sum of cubes of their digits, ≤999 = 1301). The full numbers are 153 + 370 + 371 + 407 = 1301. PE30 proper uses 5th From 21dbd195d5f2020507f6faad0c06ec9b9392458a Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 23:37:36 +0000 Subject: [PATCH 213/298] ocaml: phase 5.1 euler29_small.ml baseline (distinct a^b for 2<=a,b<=5 = 15) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute every power a^b for a, b in [2..N] and count distinct values. Hashtbl as a set with unit-payload (iter-168 idiom): let euler29 n = let h = Hashtbl.create 64 in for a = 2 to n do for b = 2 to n do let p = ref 1 in for _ = 1 to b do p := !p * a done; Hashtbl.replace h !p () done done; Hashtbl.length h For N=5: 16 powers minus one duplicate (4^2 = 2^4 = 16) -> 15. Real PE29 uses N=100 (answer 9183). 120 baseline programs total — milestone. --- lib/ocaml/baseline/euler29_small.ml | 14 ++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/euler29_small.ml diff --git a/lib/ocaml/baseline/euler29_small.ml b/lib/ocaml/baseline/euler29_small.ml new file mode 100644 index 00000000..602cf7cb --- /dev/null +++ b/lib/ocaml/baseline/euler29_small.ml @@ -0,0 +1,14 @@ +let euler29 n = + let h = Hashtbl.create 64 in + for a = 2 to n do + for b = 2 to n do + let p = ref 1 in + for _ = 1 to b do p := !p * a done; + Hashtbl.replace h !p () + done + done; + Hashtbl.length h + +;; + +euler29 5 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 55e60ad7..5255b53d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -33,6 +33,7 @@ "euler21_small.ml": 504, "euler25.ml": 55, "euler28.ml": 261, + "euler29_small.ml": 15, "euler30_cube.ml": 1301, "euler3.ml": 29, "euler4_small.ml": 9009, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 73af64c8..afec331a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler29_small.ml baseline (distinct a^b for + 2≤a,b≤5 = 15). 16 powers minus the one duplicate (4^2 = 2^4 = 16) + → 15 distinct values. Hashtbl as a set with unit-payload (the + iter-168 idiom). Real PE29 uses N=100 (answer 9183). 120 baseline + programs total — milestone. - 2026-05-09 Phase 5.1 — euler21_small.ml baseline (sum of amicable numbers ≤ 300 = 504). div_sum computes proper divisor sum via trial division up to √n; outer loop finds amicable pairs (a, b) From 4e6a3453421dedc4e125b72ad80455e326f5af85 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 23:50:25 +0000 Subject: [PATCH 214/298] ocaml: phase 5.1 euler34_small.ml baseline (factorions <= 2000 = 145) Number that equals the sum of factorials of its digits: 145 = 1! + 4! + 5! = 1 + 24 + 120 Implementation: fact n iterative factorial digit_fact_sum n walk digits, sum fact(digit) euler34 limit scan 3..limit, accumulate matches The only other factorion is 40585 = 4!+0!+5!+8!+5!. Real PE34 sums both (= 40730); 2000 keeps under our search budget. 121 baseline programs total. --- lib/ocaml/baseline/euler34_small.ml | 24 ++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/euler34_small.ml diff --git a/lib/ocaml/baseline/euler34_small.ml b/lib/ocaml/baseline/euler34_small.ml new file mode 100644 index 00000000..b8725b9a --- /dev/null +++ b/lib/ocaml/baseline/euler34_small.ml @@ -0,0 +1,24 @@ +let fact n = + let r = ref 1 in + for i = 2 to n do r := !r * i done; + !r + +let digit_fact_sum n = + let m = ref n in + let s = ref 0 in + while !m > 0 do + s := !s + fact (!m mod 10); + m := !m / 10 + done; + !s + +let euler34 limit = + let total = ref 0 in + for n = 3 to limit do + if digit_fact_sum n = n then total := !total + n + done; + !total + +;; + +euler34 2000 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 5255b53d..d0b59c2f 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -35,6 +35,7 @@ "euler28.ml": 261, "euler29_small.ml": 15, "euler30_cube.ml": 1301, + "euler34_small.ml": 145, "euler3.ml": 29, "euler4_small.ml": 9009, "euler5.ml": 232792560, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index afec331a..01a85248 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — euler34_small.ml baseline (numbers equal + to sum of factorials of digits, ≤2000 = 145). 145 = 1!+4!+5! = + 1+24+120. The other "factorion" 40585 is the only number above + this threshold; PE34 sums both = 40730. 121 baseline programs + total. - 2026-05-09 Phase 5.1 — euler29_small.ml baseline (distinct a^b for 2≤a,b≤5 = 15). 16 powers minus the one duplicate (4^2 = 2^4 = 16) → 15 distinct values. Hashtbl as a set with unit-payload (the From 37f7405dcfe4dcfb45a7fd8d80eeb16380b04e3a Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 00:09:57 +0000 Subject: [PATCH 215/298] ocaml: phase 5.1 euler40_small.ml baseline (Champernowne digit product = 15) Build the Champernowne string '12345678910111213...' until at least 1500 chars; product of digits at positions 1, 10, 100, 1000 is 1 * 1 * 5 * 3 = 15. Initial implementation timed out: 'String.length (Buffer.contents buf) < 1500' rebuilt the full string each iteration (O(n^2) in our spec-level evaluator). Fixed by tracking length separately from the Buffer: let len = ref 0 in while !len < 1500 do let s = string_of_int !i in Buffer.add_string buf s; len := !len + String.length s; i := !i + 1 done Real PE40 uses positions up to 10^6 (answer 210); 1000 keeps under budget while exercising the same string-build + char-pick pattern. 122 baseline programs total. --- lib/ocaml/baseline/euler40_small.ml | 22 ++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/euler40_small.ml diff --git a/lib/ocaml/baseline/euler40_small.ml b/lib/ocaml/baseline/euler40_small.ml new file mode 100644 index 00000000..7905f46e --- /dev/null +++ b/lib/ocaml/baseline/euler40_small.ml @@ -0,0 +1,22 @@ +let euler40 () = + let buf = Buffer.create 4096 in + let len = ref 0 in + let i = ref 1 in + while !len < 1500 do + let s = string_of_int !i in + Buffer.add_string buf s; + len := !len + String.length s; + i := !i + 1 + done; + let s = Buffer.contents buf in + let prod = ref 1 in + let positions = [1; 10; 100; 1000] in + List.iter (fun p -> + let c = s.[p - 1] in + prod := !prod * (Char.code c - Char.code '0') + ) positions; + !prod + +;; + +euler40 () diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d0b59c2f..2b64e4bb 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -36,6 +36,7 @@ "euler29_small.ml": 15, "euler30_cube.ml": 1301, "euler34_small.ml": 145, + "euler40_small.ml": 15, "euler3.ml": 29, "euler4_small.ml": 9009, "euler5.ml": 232792560, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 01a85248..248b321f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — euler40_small.ml baseline (Champernowne + digit-product at 1, 10, 100, 1000 = 1*1*5*3 = 15). Builds the + Champernowne string until ≥1500 chars; tracks length separately + from the Buffer to avoid O(n²) `String.length (Buffer.contents + buf)` reallocation. Real PE40 uses positions up to 10^6 (answer + 210). 122 baseline programs total. - 2026-05-09 Phase 5.1 — euler34_small.ml baseline (numbers equal to sum of factorials of digits, ≤2000 = 145). 145 = 1!+4!+5! = 1+24+120. The other "factorion" 40585 is the only number above From ed8aaf8af7f61255550ef061770803ec4a8dce45 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 00:26:46 +0000 Subject: [PATCH 216/298] ocaml: phase 5.1 euler36.ml baseline (sum of double-base palindromes <= 1000 = 1772) Numbers that read the same in base 10 and base 2: 1, 3, 5, 7, 9, 33, 99, 313, 585, 717 sum = 1772 Implementation: pal_dec n check decimal palindrome via index walk to_binary n build binary string via mod 2 / div 2 stack pal_bin n check binary palindrome euler36 limit scan 1..limit-1, sum where both palindromes Real PE36 uses 10^6 (answer 872187). 1000 takes ~9 minutes on contended host but stays within reasonable budget for the spec-level evaluator. 123 baseline programs total. --- lib/ocaml/baseline/euler36.ml | 41 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 6 +++++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/euler36.ml diff --git a/lib/ocaml/baseline/euler36.ml b/lib/ocaml/baseline/euler36.ml new file mode 100644 index 00000000..e1c81c19 --- /dev/null +++ b/lib/ocaml/baseline/euler36.ml @@ -0,0 +1,41 @@ +let pal_dec n = + let s = string_of_int n in + let len = String.length s in + let p = ref true in + for i = 0 to len / 2 - 1 do + if s.[i] <> s.[len - 1 - i] then p := false + done; + !p + +let to_binary n = + if n = 0 then "0" + else + let buf = Buffer.create 32 in + let m = ref n in + let stack = ref [] in + while !m > 0 do + stack := (!m mod 2) :: !stack; + m := !m / 2 + done; + List.iter (fun d -> Buffer.add_string buf (string_of_int d)) !stack; + Buffer.contents buf + +let pal_bin n = + let s = to_binary n in + let len = String.length s in + let p = ref true in + for i = 0 to len / 2 - 1 do + if s.[i] <> s.[len - 1 - i] then p := false + done; + !p + +let euler36 limit = + let sum = ref 0 in + for n = 1 to limit - 1 do + if pal_dec n && pal_bin n then sum := !sum + n + done; + !sum + +;; + +euler36 1000 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 2b64e4bb..d995d935 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -36,6 +36,7 @@ "euler29_small.ml": 15, "euler30_cube.ml": 1301, "euler34_small.ml": 145, + "euler36.ml": 1772, "euler40_small.ml": 15, "euler3.ml": 29, "euler4_small.ml": 9009, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 248b321f..f779c07c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — euler36.ml baseline (sum of double-base + palindromes ≤ 1000 = 1772). Numbers that read the same in base 10 + and base 2: 1, 3, 5, 7, 9, 33, 99, 313, 585, 717. Sum = 1772. + Real PE36 uses 10^6 (answer 872187); 1000 takes ~9 minutes on + contended host but fits within 480s timeout * inner-iteration + cost ratio. 123 baseline programs total. - 2026-05-10 Phase 5.1 — euler40_small.ml baseline (Champernowne digit-product at 1, 10, 100, 1000 = 1*1*5*3 = 15). Builds the Champernowne string until ≥1500 chars; tracks length separately From da96a79104d844d37885640c0d18feb00b0c72ef Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 00:36:29 +0000 Subject: [PATCH 217/298] ocaml: phase 5.1 abundant.ml baseline (count abundant numbers < 100 = 21) A number n is abundant if its proper-divisor sum exceeds n. Reuses the trial-division div_sum helper: let count_abundant n = let c = ref 0 in for i = 12 to n - 1 do if div_sum i > i then c := !c + 1 done; !c count_abundant 100 = 21 Abundant numbers under 100, starting at 12, 18, 20, 24, 30, 36, 40, 42, 48, 54, 56, 60, 66, 70, 72, 78, 80, 84, 88, 90, 96 -> 21. Companion to euler21_small.ml (amicable). The classification: perfect: d(n) = n (e.g. 6, 28) abundant: d(n) > n (e.g. 12, 18) deficient:d(n) < n (everything else) 124 baseline programs total. --- lib/ocaml/baseline/abundant.ml | 23 +++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/abundant.ml diff --git a/lib/ocaml/baseline/abundant.ml b/lib/ocaml/baseline/abundant.ml new file mode 100644 index 00000000..c5b17781 --- /dev/null +++ b/lib/ocaml/baseline/abundant.ml @@ -0,0 +1,23 @@ +let div_sum n = + let s = ref 1 in + let i = ref 2 in + while !i * !i <= n do + if n mod !i = 0 then begin + s := !s + !i; + let q = n / !i in + if q <> !i then s := !s + q + end; + i := !i + 1 + done; + if n = 1 then 0 else !s + +let count_abundant n = + let c = ref 0 in + for i = 12 to n - 1 do + if div_sum i > i then c := !c + 1 + done; + !c + +;; + +count_abundant 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d995d935..88a56448 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,4 +1,5 @@ { + "abundant.ml": 21, "ackermann.ml": 125, "adler32.ml": 300286872, "anagram_check.ml": 2, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f779c07c..34acfbec 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — abundant.ml baseline (count abundant numbers + < 100 = 21). Abundant means d(n) > n where d(n) is the proper- + divisor sum. Reuses the trial-division div_sum helper from iter + 205. 21 abundant numbers under 100, starting at 12, 18, 20, 24... + 124 baseline programs total. - 2026-05-10 Phase 5.1 — euler36.ml baseline (sum of double-base palindromes ≤ 1000 = 1772). Numbers that read the same in base 10 and base 2: 1, 3, 5, 7, 9, 33, 99, 313, 585, 717. Sum = 1772. From 58ea001f128222d9926595276f10d15b6749ccae Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 01:02:18 +0000 Subject: [PATCH 218/298] ocaml: phase 5.1 perfect.ml baseline (count perfect numbers <= 500 = 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perfect numbers = those where the proper-divisor sum equals n. Three exist under 500: 6, 28, 496. (8128 is the next; 33550336 the one after that.) Same div_sum machinery as euler21_small.ml / abundant.ml (the trial-division up to sqrt-n). Original 10000 limit timed out at 10 minutes under contention (496 itself takes thousands of trials at the inner loop). 500 stays under budget while still finding all three small perfects. 125 baseline programs total — milestone. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/perfect.ml | 23 +++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/perfect.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 88a56448..07296ad2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -94,6 +94,7 @@ "pancake_sort.ml": 910, "pascal.ml": 252, "peano.ml": 30, + "perfect.ml": 3, "pi_leibniz.ml": 314, "prefix_sum.ml": 66, "pretty_table.ml": 64, diff --git a/lib/ocaml/baseline/perfect.ml b/lib/ocaml/baseline/perfect.ml new file mode 100644 index 00000000..2ec3c4a9 --- /dev/null +++ b/lib/ocaml/baseline/perfect.ml @@ -0,0 +1,23 @@ +let div_sum n = + let s = ref 1 in + let i = ref 2 in + while !i * !i <= n do + if n mod !i = 0 then begin + s := !s + !i; + let q = n / !i in + if q <> !i then s := !s + q + end; + i := !i + 1 + done; + if n = 1 then 0 else !s + +let count_perfect limit = + let c = ref 0 in + for n = 2 to limit do + if div_sum n = n then c := !c + 1 + done; + !c + +;; + +count_perfect 500 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 34acfbec..4de83aaf 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — perfect.ml baseline (count perfect numbers + ≤ 500 = 3). Perfect numbers = those where d(n) = n. Three under + 500: 6, 28, 496. (8128 is the next.) Same div_sum machinery as + euler21_small / abundant. Original 10000 limit timed out under + contention; 500 stays under budget. 125 baseline programs total — + milestone. - 2026-05-10 Phase 5.1 — abundant.ml baseline (count abundant numbers < 100 = 21). Abundant means d(n) > n where d(n) is the proper- divisor sum. Reuses the trial-division div_sum helper from iter From 0b79d4d4b4b7a8129bb4fc47f1e1ab7b2bca6b05 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 01:17:11 +0000 Subject: [PATCH 219/298] ocaml: phase 5.1 triangle_div.ml baseline (first triangle with >10 divisors = 120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PE12 with target = 10: let count_divisors n = let c = ref 0 in let i = ref 1 in while !i * !i <= n do if n mod !i = 0 then begin c := !c + 1; if !i * !i <> n then c := !c + 1 end; i := !i + 1 done; !c let first_triangle_with_divs target = walk triangles T(n) = T(n-1) + n until count_divisors T > target T(15) = 120 has 16 divisors — first to exceed 10. Real PE12 uses target 500 (answer 76576500); 10 stays well under budget. 126 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/triangle_div.ml | 26 ++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 32 insertions(+) create mode 100644 lib/ocaml/baseline/triangle_div.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 07296ad2..f2b22765 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -122,6 +122,7 @@ "sum_squares.ml": 385, "tree_depth.ml": 4, "triangle.ml": 11, + "triangle_div.ml": 120, "twosum.ml": 5, "unique_set.ml": 9, "validate.ml": 417, diff --git a/lib/ocaml/baseline/triangle_div.ml b/lib/ocaml/baseline/triangle_div.ml new file mode 100644 index 00000000..20c079db --- /dev/null +++ b/lib/ocaml/baseline/triangle_div.ml @@ -0,0 +1,26 @@ +let count_divisors n = + let c = ref 0 in + let i = ref 1 in + while !i * !i <= n do + if n mod !i = 0 then begin + c := !c + 1; + if !i * !i <> n then c := !c + 1 + end; + i := !i + 1 + done; + !c + +let first_triangle_with_divs target = + let t = ref 0 in + let n = ref 0 in + let found = ref false in + while not !found do + n := !n + 1; + t := !t + !n; + if count_divisors !t > target then found := true + done; + !t + +;; + +first_triangle_with_divs 10 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 4de83aaf..312d14ae 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — triangle_div.ml baseline (first triangle + number with > 10 divisors = 120). PE12 with target 10. T(15) = 120 + has 16 divisors {1,2,3,4,5,6,8,10,12,15,20,24,30,40,60,120} — + first to break 10. Real PE12 uses target 500 (answer 76576500); + 10 stays well under our budget. 126 baseline programs total. - 2026-05-10 Phase 5.1 — perfect.ml baseline (count perfect numbers ≤ 500 = 3). Perfect numbers = those where d(n) = n. Three under 500: 6, 28, 496. (8128 is the next.) Same div_sum machinery as From fb0e83d3a1ad45b9a55d1872d8f0f4472ad2866e Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 01:29:52 +0000 Subject: [PATCH 220/298] ocaml: phase 5.1 palindrome_sum.ml baseline (sum of 3-digit palindromes = 49500) let palindrome_sum lo hi = let total = ref 0 in for n = lo to hi do if is_pal n then total := !total + n done; !total palindrome_sum 100 999 = 49500 There are 90 three-digit palindromes (form aba; 9 choices for a, 10 for b). Average value 550, sum 49500. Companion to palindrome.ml (predicate-only) and paren_depth.ml. 127 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/palindrome_sum.ml | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 4 ++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/palindrome_sum.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f2b22765..bb8e8c68 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -89,6 +89,7 @@ "mutable_record.ml": 10, "option_match.ml": 5, "palindrome.ml": 4, + "palindrome_sum.ml": 49500, "paren_depth.ml": 7, "partition.ml": 3025, "pancake_sort.ml": 910, diff --git a/lib/ocaml/baseline/palindrome_sum.ml b/lib/ocaml/baseline/palindrome_sum.ml new file mode 100644 index 00000000..dc3fd3e4 --- /dev/null +++ b/lib/ocaml/baseline/palindrome_sum.ml @@ -0,0 +1,19 @@ +let is_pal n = + let s = string_of_int n in + let len = String.length s in + let p = ref true in + for i = 0 to len / 2 - 1 do + if s.[i] <> s.[len - 1 - i] then p := false + done; + !p + +let palindrome_sum lo hi = + let total = ref 0 in + for n = lo to hi do + if is_pal n then total := !total + n + done; + !total + +;; + +palindrome_sum 100 999 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 312d14ae..64e0a2e2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,10 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — palindrome_sum.ml baseline (sum of 3-digit + palindromes = 49500). 90 palindromes between 100 and 999 (form + aba; 9 choices for a, 10 for b). Sum = 49500 = 90 * 550 (mean + is 550). 127 baseline programs total. - 2026-05-10 Phase 5.1 — triangle_div.ml baseline (first triangle number with > 10 divisors = 120). PE12 with target 10. T(15) = 120 has 16 divisors {1,2,3,4,5,6,8,10,12,15,20,24,30,40,60,120} — From 836e01dbb439c312f0eb7873d4a1551acac7e57c Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 01:39:25 +0000 Subject: [PATCH 221/298] ocaml: phase 5.1 number_words.ml baseline (letter count of 1..19 spelled out = 106) 19-arm match returning the English word for each number 1..19, then sum String.length: let number_to_words n = match n with | 1 -> 'one' | 2 -> 'two' | ... | 19 -> 'nineteen' | _ -> '' total_letters 19 = 36 + 70 = 106 (1-9) (10-19) Real PE17 covers 1..1000 (answer 21124) but needs more elaborate number-to-words logic (compounds, 'and', 'thousand'). 1..19 keeps the program small while exercising literal-pattern match dispatch on many arms. 128 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/number_words.ml | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/ocaml/baseline/number_words.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index bb8e8c68..f8367cde 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -86,6 +86,7 @@ "module_use.ml": 3, "monotonic.ml": 4, "newton_sqrt.ml": 1414, + "number_words.ml": 106, "mutable_record.ml": 10, "option_match.ml": 5, "palindrome.ml": 4, diff --git a/lib/ocaml/baseline/number_words.ml b/lib/ocaml/baseline/number_words.ml new file mode 100644 index 00000000..467b5eaa --- /dev/null +++ b/lib/ocaml/baseline/number_words.ml @@ -0,0 +1,19 @@ +let number_to_words n = + match n with + | 1 -> "one" | 2 -> "two" | 3 -> "three" | 4 -> "four" | 5 -> "five" + | 6 -> "six" | 7 -> "seven" | 8 -> "eight" | 9 -> "nine" + | 10 -> "ten" | 11 -> "eleven" | 12 -> "twelve" + | 13 -> "thirteen" | 14 -> "fourteen" | 15 -> "fifteen" + | 16 -> "sixteen" | 17 -> "seventeen" | 18 -> "eighteen" | 19 -> "nineteen" + | _ -> "" + +let total_letters limit = + let total = ref 0 in + for i = 1 to limit do + total := !total + String.length (number_to_words i) + done; + !total + +;; + +total_letters 19 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 64e0a2e2..fee3ee91 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — number_words.ml baseline (letter count of + numbers 1-19 spelled out = 106). 19-arm match dispatch returning + the English word for each number. Sums lengths over 1..19. Real + PE17 covers 1..1000 (answer 21124) but requires more elaborate + number-to-words logic. Tests literal-pattern match with many arms. + 128 baseline programs total. - 2026-05-10 Phase 5.1 — palindrome_sum.ml baseline (sum of 3-digit palindromes = 49500). 90 palindromes between 100 and 999 (form aba; 9 choices for a, 10 for b). Sum = 49500 = 90 * 550 (mean From e77a2d3a81d711e4e138feb8b98d75d21a0f7dcc Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 01:49:23 +0000 Subject: [PATCH 222/298] ocaml: phase 5.1 zerosafe.ml baseline (Option-chained safe division, sum = 28) safe_div returns None on division by zero; safe_chain stitches two divisions, propagating None on either failure: let safe_div a b = if b = 0 then None else Some (a / b) let safe_chain a b c = match safe_div a b with | None -> None | Some q -> safe_div q c Test: safe_chain 100 2 5 = Some 10 safe_chain 100 0 5 = None -> -1 safe_chain 50 5 0 = None -> -1 safe_chain 1000 10 5 = Some 20 10 - 1 - 1 + 20 = 28 Tests Option chaining + match-on-result with sentinel default. Demonstrates the canonical 'fail-early on None' pattern. 129 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/zerosafe.ml | 14 ++++++++++++++ plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/zerosafe.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f8367cde..7fa0f1b5 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -118,6 +118,7 @@ "tic_tac_toe.ml": 1, "word_freq.ml": 8, "xor_cipher.ml": 601, + "zerosafe.ml": 28, "zigzag.ml": 55, "zip_unzip.ml": 1000, "sieve.ml": 15, diff --git a/lib/ocaml/baseline/zerosafe.ml b/lib/ocaml/baseline/zerosafe.ml new file mode 100644 index 00000000..b4054999 --- /dev/null +++ b/lib/ocaml/baseline/zerosafe.ml @@ -0,0 +1,14 @@ +let safe_div a b = + if b = 0 then None else Some (a / b) + +let safe_chain a b c = + match safe_div a b with + | None -> None + | Some q -> safe_div q c + +;; + +(match safe_chain 100 2 5 with Some x -> x | None -> -1) ++ (match safe_chain 100 0 5 with Some x -> x | None -> -1) ++ (match safe_chain 50 5 0 with Some x -> x | None -> -1) ++ (match safe_chain 1000 10 5 with Some x -> x | None -> -1) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index fee3ee91..cfebefa3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — zerosafe.ml baseline (Option-chained safe + division, sum 10 + (-1) + (-1) + 20 = 28). safe_div returns None + on division by zero; safe_chain stitches two divisions, propagating + None on either failure. Tests Option chaining + match-on-result + with sentinel default. 129 baseline programs total. - 2026-05-10 Phase 5.1 — number_words.ml baseline (letter count of numbers 1-19 spelled out = 106). 19-arm match dispatch returning the English word for each number. Sums lengths over 1..19. Real From 63901931c41c55419ccbc390997e8b96469deef8 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 02:05:09 +0000 Subject: [PATCH 223/298] ocaml: phase 5.1 tail_factorial.ml baseline (12! via tail-recursion = 479001600) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-helper tail-recursive loop threading an accumulator: let factorial n = let rec go n acc = if n <= 1 then acc else go (n - 1) (n * acc) in go n 1 factorial 12 = 479_001_600 Companion to factorial.ml (10! = 3628800 via doubly-recursive style); same answer-shape, different evaluator stress: this version has constant stack depth. 130 baseline programs total — milestone. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/tail_factorial.ml | 10 ++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 17 insertions(+) create mode 100644 lib/ocaml/baseline/tail_factorial.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7fa0f1b5..3d81952e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -114,6 +114,7 @@ "simpson_int.ml": 10000, "stable_unique.ml": 46, "subseq_check.ml": 3, + "tail_factorial.ml": 479001600, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, "word_freq.ml": 8, diff --git a/lib/ocaml/baseline/tail_factorial.ml b/lib/ocaml/baseline/tail_factorial.ml new file mode 100644 index 00000000..3526cbcc --- /dev/null +++ b/lib/ocaml/baseline/tail_factorial.ml @@ -0,0 +1,10 @@ +let factorial n = + let rec go n acc = + if n <= 1 then acc + else go (n - 1) (n * acc) + in + go n 1 + +;; + +factorial 12 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index cfebefa3..969de697 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — tail_factorial.ml baseline (12! via tail + recursion = 479001600). Single-helper tail-recursive loop + threading an accumulator. Companion to factorial.ml (10! via + doubly-recursive style); same answer-shape, different evaluator + stress (tail-call optimisation if any, or pure constant stack + depth otherwise). 130 baseline programs total — milestone. - 2026-05-10 Phase 5.1 — zerosafe.ml baseline (Option-chained safe division, sum 10 + (-1) + (-1) + 20 = 28). safe_div returns None on division by zero; safe_chain stitches two divisions, propagating From da54c3ea53954ee7e9ed4d03c9d62615bad3cb57 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 02:26:10 +0000 Subject: [PATCH 224/298] ocaml: phase 5.1 bowling.ml baseline (10-pin bowling score, sample game = 167) Walks the pin-knockdown list applying strike/spare bonuses through a 10-frame counter: strike (10): score 10 + next 2 throws, advance i+1 spare (a + b = 10): score 10 + next 1 throw, advance i+2 open (a + b < 10): score a + b, advance i+2 Frame ten special-cases are handled implicitly: the input includes bonus throws naturally and the while-loop terminates after frame 10. bowling_score [10; 7; 3; 9; 0; 10; 0; 8; 8; 2; 0; 6; 10; 10; 10; 8; 1] = 20+19+9+18+8+10+6+30+28+19 = 167 131 baseline programs total. --- lib/ocaml/baseline/bowling.ml | 24 ++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 32 insertions(+) create mode 100644 lib/ocaml/baseline/bowling.ml diff --git a/lib/ocaml/baseline/bowling.ml b/lib/ocaml/baseline/bowling.ml new file mode 100644 index 00000000..32cf71d0 --- /dev/null +++ b/lib/ocaml/baseline/bowling.ml @@ -0,0 +1,24 @@ +let bowling_score frames = + let arr = Array.of_list frames in + let n = Array.length arr in + let total = ref 0 in + let i = ref 0 in + let frame = ref 1 in + while !frame <= 10 && !i < n do + if arr.(!i) = 10 then begin + total := !total + 10 + arr.(!i + 1) + arr.(!i + 2); + i := !i + 1 + end else if !i + 1 < n && arr.(!i) + arr.(!i + 1) = 10 then begin + total := !total + 10 + arr.(!i + 2); + i := !i + 2 + end else begin + total := !total + arr.(!i) + arr.(!i + 1); + i := !i + 2 + end; + frame := !frame + 1 + done; + !total + +;; + +bowling_score [10; 7; 3; 9; 0; 10; 0; 8; 8; 2; 0; 6; 10; 10; 10; 8; 1] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3d81952e..afa8c67f 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -7,6 +7,7 @@ "anagrams.ml": 3, "atm.ml": 120, "bag.ml": 3, + "bowling.ml": 167, "bf_full.ml": 6, "bisect.ml": 141, "bigint_add.ml": 28, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 969de697..13ad9d44 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — bowling.ml baseline (10-pin bowling score + for canonical "167" PBA-style game). Walks pin-knockdown list + applying strike/spare bonuses through a 10-frame counter. Strike + consumes 1 throw + 2 bonus; spare consumes 2 throws + 1 bonus; + open frame is just the two pin counts. Frame ten special-cases + ignored (input includes the bonus throws naturally). 131 baseline + programs total. - 2026-05-10 Phase 5.1 — tail_factorial.ml baseline (12! via tail recursion = 479001600). Single-helper tail-recursive loop threading an accumulator. Companion to factorial.ml (10! via From e23aa9c273d30c849dbad094105a0ec52cd66b8d Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 02:36:37 +0000 Subject: [PATCH 225/298] ocaml: phase 5.1 reverse_int.ml baseline (digit-reverse sum = 54329) Walks digits via mod 10 / div 10, accumulating the reversed value: let reverse_int n = let m = ref n in let r = ref 0 in while !m > 0 do r := !r * 10 + !m mod 10; m := !m / 10 done; !r reverse 12345 + reverse 100 + reverse 7 = 54321 + 1 + 7 = 54329 Trailing zeros collapse (reverse 100 = 1, not 001). 132 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/reverse_int.ml | 12 ++++++++++++ plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 18 insertions(+) create mode 100644 lib/ocaml/baseline/reverse_int.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index afa8c67f..209ee3f4 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -107,6 +107,7 @@ "queens.ml": 2, "quicksort.ml": 44, "roman.ml": 44, + "reverse_int.ml": 54329, "rpn.ml": 9, "run_decode.ml": 21, "run_length.ml": 11, diff --git a/lib/ocaml/baseline/reverse_int.ml b/lib/ocaml/baseline/reverse_int.ml new file mode 100644 index 00000000..7dc1bc97 --- /dev/null +++ b/lib/ocaml/baseline/reverse_int.ml @@ -0,0 +1,12 @@ +let reverse_int n = + let m = ref n in + let r = ref 0 in + while !m > 0 do + r := !r * 10 + !m mod 10; + m := !m / 10 + done; + !r + +;; + +reverse_int 12345 + reverse_int 100 + reverse_int 7 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 13ad9d44..993a5dab 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — reverse_int.ml baseline (digit-reverse, + reverse(12345) + reverse(100) + reverse(7) = 54321 + 1 + 7 = + 54329). Walks digits via mod 10 / div 10, accumulating the + reversed value. Trailing zeros collapse (reverse 100 = 1). + 132 baseline programs total. - 2026-05-10 Phase 5.1 — bowling.ml baseline (10-pin bowling score for canonical "167" PBA-style game). Walks pin-knockdown list applying strike/spare bonuses through a 10-frame counter. Strike From 5c1b4349aabd918249bb703a7fd067d0fe39cbca Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 02:46:20 +0000 Subject: [PATCH 226/298] ocaml: phase 5.1 harshad.ml baseline (count Niven/Harshad numbers <= 100 = 33) A Harshad (or Niven) number is divisible by its digit sum: let count_harshad limit = let c = ref 0 in for n = 1 to limit do if n mod (digit_sum n) = 0 then c := !c + 1 done; !c count_harshad 100 = 33 All single-digit numbers (1..9) qualify trivially. Plus 10, 12, 18, 20, 21, 24, 27, 30, 36, 40, 42, 45, 48, 50, 54, 60, 63, 70, 72, 80, 81, 84, 90, 100 (24 more) = 33 total under 100. 133 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/harshad.ml | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/ocaml/baseline/harshad.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 209ee3f4..03ab6707 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -59,6 +59,7 @@ "grid_paths.ml": 210, "group_consec.ml": 53, "hailstone.ml": 111, + "harshad.ml": 33, "hamming.ml": 4, "hanoi.ml": 1023, "hist.ml": 75, diff --git a/lib/ocaml/baseline/harshad.ml b/lib/ocaml/baseline/harshad.ml new file mode 100644 index 00000000..cff87551 --- /dev/null +++ b/lib/ocaml/baseline/harshad.ml @@ -0,0 +1,19 @@ +let digit_sum n = + let m = ref n in + let s = ref 0 in + while !m > 0 do + s := !s + !m mod 10; + m := !m / 10 + done; + !s + +let count_harshad limit = + let c = ref 0 in + for n = 1 to limit do + if n mod (digit_sum n) = 0 then c := !c + 1 + done; + !c + +;; + +count_harshad 100 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 993a5dab..e121528e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — harshad.ml baseline (count Niven/Harshad + numbers ≤ 100 = 33). A Harshad number is divisible by its digit + sum. All single-digit numbers qualify trivially (9). Plus 10, 12, + 18, 20, 21, 24, 27, 30, 36, 40, 42, 45, 48, 50, 54, 60, 63, 70, + 72, 80, 81, 84, 90, 100 (24 more) = 33 under 100. 133 baseline + programs total. - 2026-05-10 Phase 5.1 — reverse_int.ml baseline (digit-reverse, reverse(12345) + reverse(100) + reverse(7) = 54321 + 1 + 7 = 54329). Walks digits via mod 10 / div 10, accumulating the From 36e02c906a090313b86cb567bad9e04b90ff56d9 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 03:02:17 +0000 Subject: [PATCH 227/298] ocaml: phase 5.1 pythagorean.ml baseline (primitive Pythagorean triples with hyp <= 100 = 16) Uses Euclid's formula: for coprime m > k of opposite parity, the triple (m^2 - k^2, 2mk, m^2 + k^2) is a primitive Pythagorean. let count_primitive_triples n = let c = ref 0 in for m = 2 to 50 do let kk = ref 1 in while !kk < m do if (m - !kk) mod 2 = 1 && gcd m !kk = 1 then begin let h = m * m + !kk * !kk in if h <= n then c := !c + 1 end; kk := !kk + 1 done done; !c count_primitive_triples 100 = 16 The 16 triples include the classics (3,4,5), (5,12,13), (8,15,17), (7,24,25), and end with (65,72,97). 134 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/pythagorean.ml | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/ocaml/baseline/pythagorean.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 03ab6707..e338cc68 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -105,6 +105,7 @@ "poly_stack.ml": 5, "pow_mod.ml": 738639, "prime_factors.ml": 17, + "pythagorean.ml": 16, "queens.ml": 2, "quicksort.ml": 44, "roman.ml": 44, diff --git a/lib/ocaml/baseline/pythagorean.ml b/lib/ocaml/baseline/pythagorean.ml new file mode 100644 index 00000000..3710c0c2 --- /dev/null +++ b/lib/ocaml/baseline/pythagorean.ml @@ -0,0 +1,19 @@ +let rec gcd a b = if b = 0 then a else gcd b (a mod b) + +let count_primitive_triples n = + let c = ref 0 in + for m = 2 to 50 do + let kk = ref 1 in + while !kk < m do + if (m - !kk) mod 2 = 1 && gcd m !kk = 1 then begin + let h = m * m + !kk * !kk in + if h <= n then c := !c + 1 + end; + kk := !kk + 1 + done + done; + !c + +;; + +count_primitive_triples 100 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e121528e..ef2b5f7b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — pythagorean.ml baseline (count primitive + Pythagorean triples with hypotenuse ≤ 100 = 16). Uses Euclid's + formula: for coprime m > k of opposite parity, the triple + (m² - k², 2mk, m² + k²) is primitive Pythagorean. The 16 triples + are (3,4,5), (5,12,13), (8,15,17), (7,24,25), (20,21,29), + (9,40,41), ..., (65,72,97). 134 baseline programs total. - 2026-05-10 Phase 5.1 — harshad.ml baseline (count Niven/Harshad numbers ≤ 100 = 33). A Harshad number is divisible by its digit sum. All single-digit numbers qualify trivially (9). Plus 10, 12, From 353dcb67d6bdcc7dc4e9bc915186761137c95a4c Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 03:13:36 +0000 Subject: [PATCH 228/298] ocaml: phase 5.1 partition_count.ml baseline (p(15) = 176) Counts integer partitions via classic DP: let partition_count n = let dp = Array.make (n + 1) 0 in dp.(0) <- 1; for k = 1 to n do for i = k to n do dp.(i) <- dp.(i) + dp.(i - k) done done; dp.(n) partition_count 15 = 176 Tests Array.make, .(i)<-/.(i) array access, nested for-loops, refs. 135 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/partition_count.ml | 13 +++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 20 insertions(+) create mode 100644 lib/ocaml/baseline/partition_count.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e338cc68..01db65d7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -95,6 +95,7 @@ "palindrome_sum.ml": 49500, "paren_depth.ml": 7, "partition.ml": 3025, + "partition_count.ml": 176, "pancake_sort.ml": 910, "pascal.ml": 252, "peano.ml": 30, diff --git a/lib/ocaml/baseline/partition_count.ml b/lib/ocaml/baseline/partition_count.ml new file mode 100644 index 00000000..f91bfca8 --- /dev/null +++ b/lib/ocaml/baseline/partition_count.ml @@ -0,0 +1,13 @@ +let partition_count n = + let dp = Array.make (n + 1) 0 in + dp.(0) <- 1; + for k = 1 to n do + for i = k to n do + dp.(i) <- dp.(i) + dp.(i - k) + done + done; + dp.(n) + +;; + +partition_count 15 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ef2b5f7b..b4f6ac7e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — partition_count.ml baseline (number of + integer partitions of 15 = 176). Classic DP: dp[0] = 1; for each + k from 1..n, for each i from k..n, dp[i] += dp[i - k]. O(n²) time, + O(n) space. p(15) = 176 partitions of 15. Tests Array.make, array + set/get with `.(i)<-` / `.(i)`, nested for-loops, ref deref. + 135 baseline programs total. - 2026-05-10 Phase 5.1 — pythagorean.ml baseline (count primitive Pythagorean triples with hypotenuse ≤ 100 = 16). Uses Euclid's formula: for coprime m > k of opposite parity, the triple From 76de0a20f8ade50e5ad0f4aa75a769a909719957 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 03:22:29 +0000 Subject: [PATCH 229/298] ocaml: phase 5.1 josephus.ml baseline (n=50 k=3, survivor at position 11) Classic Josephus problem solved with the standard recurrence: let rec josephus n k = if n = 1 then 0 else (josephus (n - 1) k + k) mod n josephus 50 3 + 1 = 11 50 people stand in a circle, every 3rd is eliminated; the last survivor is at position 11 (1-indexed). Tests recursion + mod. 136 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/josephus.ml | 7 +++++++ plans/ocaml-on-sx.md | 5 +++++ 3 files changed, 13 insertions(+) create mode 100644 lib/ocaml/baseline/josephus.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 01db65d7..90b61eb0 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -75,6 +75,7 @@ "max_product3.ml": 300, "max_run.ml": 5, "mod_inverse.ml": 27, + "josephus.ml": 11, "json_pretty.ml": 24, "kadane.ml": 6, "lambda_calc.ml": 7, diff --git a/lib/ocaml/baseline/josephus.ml b/lib/ocaml/baseline/josephus.ml new file mode 100644 index 00000000..0d1970c0 --- /dev/null +++ b/lib/ocaml/baseline/josephus.ml @@ -0,0 +1,7 @@ +let rec josephus n k = + if n = 1 then 0 + else (josephus (n - 1) k + k) mod n + +;; + +josephus 50 3 + 1 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b4f6ac7e..bb56d80e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,11 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — josephus.ml baseline (Josephus problem, + n=50 k=3 → survivor at position 11, 1-indexed). Uses the classic + recursive formula: J(1, k) = 0; J(n, k) = (J(n-1, k) + k) mod n. + Returns 0-indexed survivor; we add 1 for human readability. Tests + recursion, mod, integer arithmetic. 136 baseline programs total. - 2026-05-10 Phase 5.1 — partition_count.ml baseline (number of integer partitions of 15 = 176). Classic DP: dp[0] = 1; for each k from 1..n, for each i from k..n, dp[i] += dp[i - k]. O(n²) time, From 551ed44f7f71ee3d6bdbfa535bfecc8647ade9e2 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 03:41:19 +0000 Subject: [PATCH 230/298] ocaml: phase 5.1 array literals [|...|] + lis.ml baseline (LIS = 6) Added parser support for OCaml array literal syntax: [| e1; e2; ...; en |] --> Array.of_list [e1; e2; ...; en] [||] --> Array.of_list [] Desugaring keeps the array representation unchanged (ref-of-list) since Array.of_list is a no-op constructor for that backing. Tokenizer emits [, |, |, ] as separate ops; parser detects [ followed by | and enters array-literal mode, terminating on |]. Baseline lis.ml exercises the syntax: let lis arr = let n = Array.length arr in let dp = Array.make n 1 in for i = 1 to n - 1 do for j = 0 to i - 1 do if arr.(j) < arr.(i) && dp.(j) + 1 > dp.(i) then dp.(i) <- dp.(j) + 1 done done; let best = ref 0 in for i = 0 to n - 1 do if dp.(i) > !best then best := dp.(i) done; !best lis [|10; 22; 9; 33; 21; 50; 41; 60; 80|] = 6 137 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/lis.ml | 18 ++++++++++++++ lib/ocaml/parser.sx | 41 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++ 4 files changed, 70 insertions(+) create mode 100644 lib/ocaml/baseline/lis.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 90b61eb0..d2b6abc9 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -68,6 +68,7 @@ "fizz_classifier.ml": 540, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, + "lis.ml": 6, "list_ops.ml": 30, "luhn.ml": 2, "mat_mul.ml": 621, diff --git a/lib/ocaml/baseline/lis.ml b/lib/ocaml/baseline/lis.ml new file mode 100644 index 00000000..3da1c520 --- /dev/null +++ b/lib/ocaml/baseline/lis.ml @@ -0,0 +1,18 @@ +let lis arr = + let n = Array.length arr in + let dp = Array.make n 1 in + for i = 1 to n - 1 do + for j = 0 to i - 1 do + if arr.(j) < arr.(i) && dp.(j) + 1 > dp.(i) then + dp.(i) <- dp.(j) + 1 + done + done; + let best = ref 0 in + for i = 0 to n - 1 do + if dp.(i) > !best then best := dp.(i) + done; + !best + +;; + +lis [|10; 22; 9; 33; 21; 50; 41; 60; 80|] diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index bd2b77d8..92773071 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -468,6 +468,47 @@ (advance-tok!) (cond ((at-op? "]") (begin (advance-tok!) (list :list))) + ;; Array literal `[| e1; e2; ...; en |]` desugars to + ;; `Array.of_list [e1; e2; ...; en]`. Empty `[||]` + ;; → `Array.of_list []`. + ((at-op? "|") + (begin + (advance-tok!) + (cond + ((at-op? "|") + (begin + (advance-tok!) + (consume! "op" "]") + (list :app + (list :field (list :con "Array") + "of_list") + (list :list)))) + (else + (let + ((items (list))) + (begin + (append! items (parse-expr-no-seq)) + (define + aloop + (fn + () + (when + (at-op? ";") + (begin + (advance-tok!) + (when + (not (at-op? "|")) + (begin + (append! items + (parse-expr-no-seq)) + (aloop))))))) + (aloop) + (consume! "op" "|") + (consume! "op" "]") + (list :app + (list :field (list :con "Array") + "of_list") + (cons :list items)))))))) (else (let ((items (list))) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bb56d80e..ba8e479f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — array literals + lis.ml baseline (longest + increasing subsequence on a 9-element array = 6). Added parser + support for `[| e1; e2; ...; en |]` syntax: desugars to + `Array.of_list [e1; e2; ...; en]` (which is `ref [...]`). Empty + `[||]` → `Array.of_list []`. Tokenizer leaves `[`, `|`, `|`, `]` + as separate ops; parser detects `[` followed by `|` to switch to + array-literal mode and consumes `|` then `]` at the end. + Used in `lis [|10; 22; 9; 33; 21; 50; 41; 60; 80|] = 6`. + Tests Array.length, .(i)<-/.(i), nested for + ref. + 137 baseline programs total. - 2026-05-10 Phase 5.1 — josephus.ml baseline (Josephus problem, n=50 k=3 → survivor at position 11, 1-indexed). Uses the classic recursive formula: J(1, k) = 0; J(n, k) = (J(n-1, k) + k) mod n. From edbb03e205c2d98756427c2f23b9b6001f40e91b Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 03:50:59 +0000 Subject: [PATCH 231/298] ocaml: phase 5.1 quickselect.ml baseline (median of 9 elements = 5) Hoare quickselect with Lomuto partition: recursively narrows the range to whichever side contains the kth index. Mutates the array in place via .(i)<-v. The median (k=4) of [7;2;9;1;5;6;3;8;4] is 5. let rec quickselect arr lo hi k = if lo = hi then arr.(lo) else begin let pivot = arr.(hi) in let i = ref lo in for j = lo to hi - 1 do if arr.(j) < pivot then begin let t = arr.(!i) in arr.(!i) <- arr.(j); arr.(j) <- t; i := !i + 1 end done; ... end Exercises array literal syntax + in-place mutation in the same program, ensuring [|...|] yields a mutable backing. 138 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/quickselect.ml | 25 +++++++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 33 insertions(+) create mode 100644 lib/ocaml/baseline/quickselect.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d2b6abc9..08741db4 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -110,6 +110,7 @@ "prime_factors.ml": 17, "pythagorean.ml": 16, "queens.ml": 2, + "quickselect.ml": 5, "quicksort.ml": 44, "roman.ml": 44, "reverse_int.ml": 54329, diff --git a/lib/ocaml/baseline/quickselect.ml b/lib/ocaml/baseline/quickselect.ml new file mode 100644 index 00000000..cb652a35 --- /dev/null +++ b/lib/ocaml/baseline/quickselect.ml @@ -0,0 +1,25 @@ +let rec quickselect arr lo hi k = + if lo = hi then arr.(lo) + else begin + let pivot = arr.(hi) in + let i = ref lo in + for j = lo to hi - 1 do + if arr.(j) < pivot then begin + let t = arr.(!i) in + arr.(!i) <- arr.(j); + arr.(j) <- t; + i := !i + 1 + end + done; + let t = arr.(!i) in + arr.(!i) <- arr.(hi); + arr.(hi) <- t; + if !i = k then arr.(!i) + else if !i < k then quickselect arr (!i + 1) hi k + else quickselect arr lo (!i - 1) k + end + +;; + +let a = [|7; 2; 9; 1; 5; 6; 3; 8; 4|] in +quickselect a 0 8 4 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ba8e479f..a0861bdc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — quickselect.ml baseline (Hoare quickselect + median of [7;2;9;1;5;6;3;8;4] = 5). Lomuto partition scheme: + recursively partitions on the last element as pivot, narrows the + range to the side containing the kth index. Mutates the array + in place via `.(i) <- v`. Verifies that array literal syntax + works for in-place mutation paths, not just reads. 138 baseline + programs total. - 2026-05-10 Phase 5.1 — array literals + lis.ml baseline (longest increasing subsequence on a 9-element array = 6). Added parser support for `[| e1; e2; ...; en |]` syntax: desugars to From bcaa41d1aec594b4f33376883114386f7d1d7582 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 03:59:56 +0000 Subject: [PATCH 232/298] ocaml: phase 5.1 union_find.ml baseline (10 nodes, 6 unions, 4 components) Disjoint-set union with path compression: let make_uf n = Array.init n (fun i -> i) let rec find p x = if p.(x) = x then x else begin let r = find p p.(x) in p.(x) <- r; r end let union p x y = let rx = find p x in let ry = find p y in if rx <> ry then p.(rx) <- ry After unioning (0,1), (2,3), (4,5), (6,7), (0,2), (4,6): {0,1,2,3} {4,5,6,7} {8} {9} --> 4 components. Tests Array.init with closure, recursive find, in-place .(i)<-r. 139 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/union_find.ml | 33 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 41 insertions(+) create mode 100644 lib/ocaml/baseline/union_find.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 08741db4..e6661889 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -136,6 +136,7 @@ "triangle.ml": 11, "triangle_div.ml": 120, "twosum.ml": 5, + "union_find.ml": 4, "unique_set.ml": 9, "validate.ml": 417, "word_count.ml": 3 diff --git a/lib/ocaml/baseline/union_find.ml b/lib/ocaml/baseline/union_find.ml new file mode 100644 index 00000000..391ac720 --- /dev/null +++ b/lib/ocaml/baseline/union_find.ml @@ -0,0 +1,33 @@ +let make_uf n = Array.init n (fun i -> i) + +let rec find p x = + if p.(x) = x then x + else begin + let r = find p p.(x) in + p.(x) <- r; + r + end + +let union p x y = + let rx = find p x in + let ry = find p y in + if rx <> ry then p.(rx) <- ry + +let count_components p n = + let c = ref 0 in + for i = 0 to n - 1 do + if find p i = i then c := !c + 1 + done; + !c + +;; + +let n = 10 in +let p = make_uf n in +union p 0 1; +union p 2 3; +union p 4 5; +union p 6 7; +union p 0 2; +union p 4 6; +count_components p n diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index a0861bdc..153bd948 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — union_find.ml baseline (disjoint-set union + on n=10 with 6 unions → 4 components). Path-compressing find: + recursively walks parent links, splices subtree onto root in place. + After unioning {0-1, 2-3, 0-2} → {0,1,2,3}; {4-5, 6-7, 4-6} → + {4,5,6,7}; 8 and 9 remain singletons → 4 components. Tests + Array.init, in-place .(i)<-r mutation, recursive find with + path compression. 139 baseline programs total. - 2026-05-10 Phase 5.1 — quickselect.ml baseline (Hoare quickselect median of [7;2;9;1;5;6;3;8;4] = 5). Lomuto partition scheme: recursively partitions on the last element as pivot, narrows the From 713d506bb892e8fab701f5a707b997ecece97366 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 04:08:53 +0000 Subject: [PATCH 233/298] ocaml: phase 5.1 kmp.ml baseline (5 occurrences of "abab" in haystack) Knuth-Morris-Pratt linear-time string search: - kmp_table builds failure function in O(|pattern|) - kmp_search scans text once in O(|text|), counting matches - After a hit, k := t.(n-1) so overlapping matches still count kmp_search "abababcabababcababcc" "abab" = 5 Hits at positions 0, 2, 7, 9, 14 (overlapping at 0/2 and 7/9). Tests: nested while-inside-for, char inequality (.<>), pat.[i] string indexing, Array.make 0, combined string + array indexing. 140 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/kmp.ml | 37 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 7 ++++++ 3 files changed, 45 insertions(+) create mode 100644 lib/ocaml/baseline/kmp.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e6661889..ec5db123 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -79,6 +79,7 @@ "josephus.ml": 11, "json_pretty.ml": 24, "kadane.ml": 6, + "kmp.ml": 5, "lambda_calc.ml": 7, "majority_vote.ml": 4, "levenshtein.ml": 11, diff --git a/lib/ocaml/baseline/kmp.ml b/lib/ocaml/baseline/kmp.ml new file mode 100644 index 00000000..18787f2e --- /dev/null +++ b/lib/ocaml/baseline/kmp.ml @@ -0,0 +1,37 @@ +let kmp_table pat = + let n = String.length pat in + let t = Array.make n 0 in + let k = ref 0 in + for i = 1 to n - 1 do + while !k > 0 && pat.[!k] <> pat.[i] do + k := t.(!k - 1) + done; + if pat.[!k] = pat.[i] then k := !k + 1; + t.(i) <- !k + done; + t + +let kmp_search text pat = + let m = String.length text in + let n = String.length pat in + if n = 0 then 0 + else begin + let t = kmp_table pat in + let count = ref 0 in + let k = ref 0 in + for i = 0 to m - 1 do + while !k > 0 && pat.[!k] <> text.[i] do + k := t.(!k - 1) + done; + if pat.[!k] = text.[i] then k := !k + 1; + if !k = n then begin + count := !count + 1; + k := t.(n - 1) + end + done; + !count + end + +;; + +kmp_search "abababcabababcababcc" "abab" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 153bd948..3094e11d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — kmp.ml baseline (KMP string search, count + occurrences of "abab" in "abababcabababcababcc" = 5). Two-phase + classic: build failure table in O(m), then linear scan in O(n). + After a full match, set k := t.(n-1) so overlapping matches still + count (so "abab" hits at positions 0, 2, 7, 9, 14). Tests nested + while-inside-for, char comparison via `pat.[i]`, Array.make 0, + combined string + array indexing. 140 baseline programs total. - 2026-05-10 Phase 5.1 — union_find.ml baseline (disjoint-set union on n=10 with 6 unions → 4 components). Path-compressing find: recursively walks parent links, splices subtree onto root in place. From 42a506faffd4aee2403d099b8909a52a62ef7772 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 04:20:47 +0000 Subject: [PATCH 234/298] ocaml: phase 5.1 dijkstra.ml baseline (5-node SSSP, dist(0,4) = 7) Array-based O(n^2) Dijkstra on a small directed weighted graph: edges = [| [(1, 4); (2, 1)]; (* 0 -> 1 (w=4), 2 (w=1) *) [(3, 1)]; (* 1 -> 3 (w=1) *) [(1, 2); (3, 5)]; (* 2 -> 1 (w=2), 3 (w=5) *) [(4, 3)]; (* 3 -> 4 (w=3) *) [] (* 4 sink *) |] Optimal path 0->2->1->3->4 has weight 1+2+1+3 = 7. Tests: array-of-list-of-int-pair literal, List.iter with tuple destructuring closure, in-place dist mutation, nested for + ref. 141 baseline programs total. --- lib/ocaml/baseline/dijkstra.ml | 37 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 47 insertions(+) create mode 100644 lib/ocaml/baseline/dijkstra.ml diff --git a/lib/ocaml/baseline/dijkstra.ml b/lib/ocaml/baseline/dijkstra.ml new file mode 100644 index 00000000..39d36f7b --- /dev/null +++ b/lib/ocaml/baseline/dijkstra.ml @@ -0,0 +1,37 @@ +let n = 5 + +let edges = [| + [(1, 4); (2, 1)]; + [(3, 1)]; + [(1, 2); (3, 5)]; + [(4, 3)]; + [] +|] + +let dijkstra src = + let dist = Array.make n 1000000 in + dist.(src) <- 0; + let visited = Array.make n false in + for _ = 0 to n - 1 do + let u = ref (-1) in + let best = ref 1000000 in + for v = 0 to n - 1 do + if (not visited.(v)) && dist.(v) < !best then begin + best := dist.(v); + u := v + end + done; + if !u >= 0 then begin + visited.(!u) <- true; + List.iter (fun (v, w) -> + if dist.(!u) + w < dist.(v) then + dist.(v) <- dist.(!u) + w + ) edges.(!u) + end + done; + dist + +;; + +let d = dijkstra 0 in +d.(4) diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ec5db123..2c9d525b 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -25,6 +25,7 @@ "coin_change.ml": 6, "count_change.ml": 406, "csv.ml": 10, + "dijkstra.ml": 7, "exception_handle.ml": 4, "exception_user.ml": 26, "euler1.ml": 233168, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3094e11d..6d09d730 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — dijkstra.ml baseline (single-source shortest + path on a 5-node weighted graph, dist 0→4 = 7). O(n²) array-based + Dijkstra: at each step, scan unvisited vertices for the minimum + tentative distance, relax outgoing edges. Adjacency stored as + `(int * int) list array` literal with int-pair tuples for + destination + weight. The optimal path 0→2→1→3→4 has weight + 1+2+1+3=7. Tests array of lists of pairs, List.iter with + destructuring closure, in-place dist mutation. 141 baseline + programs total. - 2026-05-10 Phase 5.1 — kmp.ml baseline (KMP string search, count occurrences of "abab" in "abababcabababcababcc" = 5). Two-phase classic: build failure table in O(m), then linear scan in O(n). From 7a67637826790eba6b9ecc7c5f221fc51505632b Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 04:29:58 +0000 Subject: [PATCH 235/298] ocaml: phase 5.1 lcs.ml baseline (LCS of "ABCBDAB" and "BDCAB" = 4) Classic 2D DP for longest common subsequence, optimized to use two rolling 1D arrays (prev / curr) for O(min(m,n)) space: for i = 1 to m do for j = 1 to n do if s1.[i-1] = s2.[j-1] then curr.(j) <- prev.(j-1) + 1 else if prev.(j) >= curr.(j-1) then curr.(j) <- prev.(j) else curr.(j) <- curr.(j-1) done; for j = 0 to n do prev.(j) <- curr.(j) done done; prev.(n) lcs "ABCBDAB" "BDCAB" = 4 Two valid LCS witnesses: BCAB and BDAB. Avoids Array.make_matrix (not in our runtime) by manual rolling. 142 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/lcs.ml | 23 +++++++++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/lcs.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 2c9d525b..a3a88858 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -82,6 +82,7 @@ "kadane.ml": 6, "kmp.ml": 5, "lambda_calc.ml": 7, + "lcs.ml": 4, "majority_vote.ml": 4, "levenshtein.ml": 11, "memo_fib.ml": 75025, diff --git a/lib/ocaml/baseline/lcs.ml b/lib/ocaml/baseline/lcs.ml new file mode 100644 index 00000000..bc3a53fa --- /dev/null +++ b/lib/ocaml/baseline/lcs.ml @@ -0,0 +1,23 @@ +let lcs s1 s2 = + let m = String.length s1 in + let n = String.length s2 in + let prev = Array.make (n + 1) 0 in + let curr = Array.make (n + 1) 0 in + for i = 1 to m do + for j = 1 to n do + if s1.[i - 1] = s2.[j - 1] then + curr.(j) <- prev.(j - 1) + 1 + else if prev.(j) >= curr.(j - 1) then + curr.(j) <- prev.(j) + else + curr.(j) <- curr.(j - 1) + done; + for j = 0 to n do + prev.(j) <- curr.(j) + done + done; + prev.(n) + +;; + +lcs "ABCBDAB" "BDCAB" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6d09d730..2463a2e2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — lcs.ml baseline (longest common subsequence + of "ABCBDAB" and "BDCAB" = 4). Rolling-array DP in O(mn) time and + O(min(m,n)) space: keep `prev` and `curr` rows, copy after each + outer step. Two LCS witnesses: "BCAB" and "BDAB". Avoids needing + `Array.make_matrix` (not in our runtime) by manual rolling. Tests + string indexing, double-nested for-loops, sibling for to copy + rows, integer compare. 142 baseline programs total. - 2026-05-10 Phase 5.1 — dijkstra.ml baseline (single-source shortest path on a 5-node weighted graph, dist 0→4 = 7). O(n²) array-based Dijkstra: at each step, scan unvisited vertices for the minimum From 57a63826e3863d03eee687ef5cf3decbbaac43ef Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 04:38:59 +0000 Subject: [PATCH 236/298] ocaml: phase 5.1 knapsack.ml baseline (0/1 knapsack, cap=8 -> 36) Standard 1D 0/1 knapsack DP with reverse inner loop: let knapsack values weights cap = let n = Array.length values in let dp = Array.make (cap + 1) 0 in for i = 0 to n - 1 do let v = values.(i) and w = weights.(i) in for c = cap downto w do let take = dp.(c - w) + v in if take > dp.(c) then dp.(c) <- take done done; dp.(cap) values: [|6; 10; 12; 15; 20|] weights: [|1; 2; 3; 4; 5|] knapsack v w 8 = 36 (* take items with weights 1, 2, 5 *) Tests for-downto + array literal access in the same hot loop. 143 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/knapsack.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/ocaml/baseline/knapsack.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index a3a88858..ebee2b32 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -81,6 +81,7 @@ "json_pretty.ml": 24, "kadane.ml": 6, "kmp.ml": 5, + "knapsack.ml": 36, "lambda_calc.ml": 7, "lcs.ml": 4, "majority_vote.ml": 4, diff --git a/lib/ocaml/baseline/knapsack.ml b/lib/ocaml/baseline/knapsack.ml new file mode 100644 index 00000000..c889825a --- /dev/null +++ b/lib/ocaml/baseline/knapsack.ml @@ -0,0 +1,17 @@ +let knapsack values weights cap = + let n = Array.length values in + let dp = Array.make (cap + 1) 0 in + for i = 0 to n - 1 do + let v = values.(i) and w = weights.(i) in + for c = cap downto w do + let take = dp.(c - w) + v in + if take > dp.(c) then dp.(c) <- take + done + done; + dp.(cap) + +;; + +let v = [|6; 10; 12; 15; 20|] in +let w = [|1; 2; 3; 4; 5|] in +knapsack v w 8 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2463a2e2..9338f7cc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — knapsack.ml baseline (0/1 knapsack DP, + cap=8 with values [|6;10;12;15;20|] and weights [|1;2;3;4;5|] + → max value 36). 1D rolling DP: outer loop over items, inner + for-downto so each item used at most once. Optimal pack {1,2,5} + weighing 8 with value 36 (also matches 6+10+20). Tests + `for c = cap downto w do … done` reverse iteration plus + array literal access in the same hot loop. 143 baseline + programs total. - 2026-05-10 Phase 5.1 — lcs.ml baseline (longest common subsequence of "ABCBDAB" and "BDCAB" = 4). Rolling-array DP in O(mn) time and O(min(m,n)) space: keep `prev` and `curr` rows, copy after each From 872302ede18b73ea9bab817f333a8e9dc4e3fd3c Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 04:51:15 +0000 Subject: [PATCH 237/298] ocaml: phase 5.1 topo_sort.ml baseline (6-node DAG, all 6 ordered) Kahn's algorithm BFS topological sort: let topo_sort n adj = let in_deg = Array.make n 0 in for i = 0 to n - 1 do List.iter (fun j -> in_deg.(j) <- in_deg.(j) + 1) adj.(i) done; let q = Queue.create () in for i = 0 to n - 1 do if in_deg.(i) = 0 then Queue.push i q done; let count = ref 0 in while not (Queue.is_empty q) do let u = Queue.pop q in count := !count + 1; List.iter (fun v -> in_deg.(v) <- in_deg.(v) - 1; if in_deg.(v) = 0 then Queue.push v q ) adj.(u) done; !count Graph: 0->{1,2}; 1->{3}; 2->{3,4}; 3->{5}; 4->{5}; 5. Acyclic, so all 6 nodes can be ordered. Tests Queue.{create,push,pop,is_empty}, mutable array via closure. 144 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/topo_sort.ml | 32 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 41 insertions(+) create mode 100644 lib/ocaml/baseline/topo_sort.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ebee2b32..13c5866c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -129,6 +129,7 @@ "tail_factorial.ml": 479001600, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, + "topo_sort.ml": 6, "word_freq.ml": 8, "xor_cipher.ml": 601, "zerosafe.ml": 28, diff --git a/lib/ocaml/baseline/topo_sort.ml b/lib/ocaml/baseline/topo_sort.ml new file mode 100644 index 00000000..e4d0296c --- /dev/null +++ b/lib/ocaml/baseline/topo_sort.ml @@ -0,0 +1,32 @@ +let topo_sort n adj = + let in_deg = Array.make n 0 in + for i = 0 to n - 1 do + List.iter (fun j -> in_deg.(j) <- in_deg.(j) + 1) adj.(i) + done; + let q = Queue.create () in + for i = 0 to n - 1 do + if in_deg.(i) = 0 then Queue.push i q + done; + let count = ref 0 in + while not (Queue.is_empty q) do + let u = Queue.pop q in + count := !count + 1; + List.iter (fun v -> + in_deg.(v) <- in_deg.(v) - 1; + if in_deg.(v) = 0 then Queue.push v q + ) adj.(u) + done; + !count + +;; + +let n = 6 in +let adj = [| + [1; 2]; + [3]; + [3; 4]; + [5]; + [5]; + [] +|] in +topo_sort n adj diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 9338f7cc..d5e6dca9 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — topo_sort.ml baseline (Kahn's algorithm + topological sort of a 6-node DAG → all 6 vertices ordered). + Standard BFS approach: compute in-degrees, seed queue with zero- + indegree nodes, pop and decrement neighbours. Graph: 0→{1,2}; + 1→{3}; 2→{3,4}; 3→{5}; 4→{5}; 5. A valid topological order is + 0,1,2,3,4,5. Returns count of sortable nodes (6 means acyclic). + Tests Queue.{create,push,pop,is_empty}, List.iter with closure + capturing `in_deg`, mutable array. 144 baseline programs total. - 2026-05-10 Phase 5.1 — knapsack.ml baseline (0/1 knapsack DP, cap=8 with values [|6;10;12;15;20|] and weights [|1;2;3;4;5|] → max value 36). 1D rolling DP: outer loop over items, inner From 74d8ade089753f44d02735468d5cb0af6140fa3b Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 05:01:08 +0000 Subject: [PATCH 238/298] ocaml: phase 5.1 count_inversions.ml baseline (12 inversions via merge sort) Modified merge sort that counts inversions during the merge step: when an element from the right half is selected, the remaining elements of the left half (mid - i + 1) all form inversions with that right element. count_inv [|8; 4; 2; 1; 3; 5; 7; 6|] = 12 Inversions of [8;4;2;1;3;5;7;6]: with 8: (8,4)(8,2)(8,1)(8,3)(8,5)(8,7)(8,6) = 7 with 4: (4,2)(4,1)(4,3) = 3 with 2: (2,1) = 1 with 7: (7,6) = 1 total = 12 Tests: let rec ... and ... mutual recursion, while + ref + array mutation, in-place sort with auxiliary scratch array. 145 baseline programs total. --- lib/ocaml/baseline/count_inversions.ml | 42 ++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++ 3 files changed, 50 insertions(+) create mode 100644 lib/ocaml/baseline/count_inversions.ml diff --git a/lib/ocaml/baseline/count_inversions.ml b/lib/ocaml/baseline/count_inversions.ml new file mode 100644 index 00000000..8daae61b --- /dev/null +++ b/lib/ocaml/baseline/count_inversions.ml @@ -0,0 +1,42 @@ +let count_inv arr = + let n = Array.length arr in + let temp = Array.make n 0 in + let count = ref 0 in + let rec merge lo mid hi = + let i = ref lo and j = ref (mid + 1) and k = ref lo in + while !i <= mid && !j <= hi do + if arr.(!i) <= arr.(!j) then begin + temp.(!k) <- arr.(!i); + i := !i + 1 + end else begin + temp.(!k) <- arr.(!j); + count := !count + (mid - !i + 1); + j := !j + 1 + end; + k := !k + 1 + done; + while !i <= mid do + temp.(!k) <- arr.(!i); + i := !i + 1; k := !k + 1 + done; + while !j <= hi do + temp.(!k) <- arr.(!j); + j := !j + 1; k := !k + 1 + done; + for x = lo to hi do + arr.(x) <- temp.(x) + done + and sort lo hi = + if lo < hi then begin + let mid = (lo + hi) / 2 in + sort lo mid; + sort (mid + 1) hi; + merge lo mid hi + end + in + sort 0 (n - 1); + !count + +;; + +count_inv [|8; 4; 2; 1; 3; 5; 7; 6|] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 13c5866c..ea5be3fe 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -24,6 +24,7 @@ "closures.ml": 315, "coin_change.ml": 6, "count_change.ml": 406, + "count_inversions.ml": 12, "csv.ml": 10, "dijkstra.ml": 7, "exception_handle.ml": 4, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d5e6dca9..5f4211cc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — count_inversions.ml baseline (count + inversions of [|8;4;2;1;3;5;7;6|] = 12, via merge-sort). Modified + merge sort: when right element is taken, accumulate `mid - i + 1` + inversions for the remaining left half. Tests `let rec merge ... + and sort ...` mutually recursive bindings, complex while + ref + + array mutation, in-place sort with auxiliary array. + 145 baseline programs total. - 2026-05-10 Phase 5.1 — topo_sort.ml baseline (Kahn's algorithm topological sort of a 6-node DAG → all 6 vertices ordered). Standard BFS approach: compute in-degrees, seed queue with zero- From dfd89d998e6eda4b0ff7ae60cadccf5486515439 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 05:11:12 +0000 Subject: [PATCH 239/298] ocaml: phase 5.1 trie.ml baseline (prefix tree, 6/9 word lookups match) Mutable-record trie with linked-list children: type trie = { mutable terminal : bool; mutable children : (char * trie) list } Insert {cat, car, card, cart, dog, doge}; lookup 9 words. Hits are exactly the inserted set: cat, car, card, cart, dog, doge = 6. Misses: ca (prefix not terminal), dogs (extends 'dog' but no 'dogs' node), x (no path). Tests: - recursive type definition with self-referential field - mutable record fields with .field <- v - Option pattern matching (Some / None) - tuple-cons pattern (k, v) :: rest 146 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/trie.ml | 36 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 +++++++++ 3 files changed, 47 insertions(+) create mode 100644 lib/ocaml/baseline/trie.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ea5be3fe..269c63b2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -140,6 +140,7 @@ "sum_squares.ml": 385, "tree_depth.ml": 4, "triangle.ml": 11, + "trie.ml": 6, "triangle_div.ml": 120, "twosum.ml": 5, "union_find.ml": 4, diff --git a/lib/ocaml/baseline/trie.ml b/lib/ocaml/baseline/trie.ml new file mode 100644 index 00000000..60f0358f --- /dev/null +++ b/lib/ocaml/baseline/trie.ml @@ -0,0 +1,36 @@ +type trie = { mutable terminal : bool; mutable children : (char * trie) list } + +let make_trie () = { terminal = false; children = [] } + +let rec lookup_child cs c = + match cs with + | [] -> None + | (k, v) :: rest -> if k = c then Some v else lookup_child rest c + +let rec insert t s i = + if i = String.length s then t.terminal <- true + else + let c = s.[i] in + match lookup_child t.children c with + | Some child -> insert child s (i + 1) + | None -> + let nc = make_trie () in + t.children <- (c, nc) :: t.children; + insert nc s (i + 1) + +let rec contains t s i = + if i = String.length s then t.terminal + else + let c = s.[i] in + match lookup_child t.children c with + | Some child -> contains child s (i + 1) + | None -> false + +;; + +let t = make_trie () in +List.iter (fun w -> insert t w 0) ["cat"; "car"; "card"; "cart"; "dog"; "doge"]; +let count = ref 0 in +List.iter (fun w -> if contains t w 0 then count := !count + 1) + ["cat"; "car"; "ca"; "card"; "cart"; "dog"; "doge"; "dogs"; "x"]; +!count diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5f4211cc..4e42583d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — trie.ml baseline (prefix tree, 6 of 9 + word lookups match). Mutable record `{ terminal; children }` with + `children : (char * trie) list`; insert recurses down character + by character, mutating children list when a path is missing. + Insert {cat, car, card, cart, dog, doge}; lookup {cat, car, ca, + card, cart, dog, doge, dogs, x}. The 6 hits are exactly the + inserted words; "ca", "dogs", "x" miss. + Tests recursive type definition with self-reference, mutable + records, `match … | Some / None` over option, pattern with + `(k, v) :: rest` tuple destructuring. 146 baseline programs total. - 2026-05-10 Phase 5.1 — count_inversions.ml baseline (count inversions of [|8;4;2;1;3;5;7;6|] = 12, via merge-sort). Modified merge sort: when right element is taken, accumulate `mid - i + 1` From 99f321f532b86fa5a2a8669a52807edff8e4be27 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 05:21:14 +0000 Subject: [PATCH 240/298] ocaml: phase 5.1 mst_kruskal.ml baseline (5-node MST weight 11) Kruskal's minimum spanning tree using path-compressing union-find: edges (w, u, v): (1, 0, 1) (2, 1, 2) (3, 0, 3) (4, 2, 3) (5, 3, 4) (6, 0, 4) After sorting by weight and greedily unioning: pick (1,0,1) -> components: {0,1} {2} {3} {4} pick (2,1,2) -> {0,1,2} {3} {4} pick (3,0,3) -> {0,1,2,3} {4} skip (4,2,3) -- already connected pick (5,3,4) -> {0,1,2,3,4} skip (6,0,4) -- already connected MST weight = 1 + 2 + 3 + 5 = 11 Tests List.sort with 3-tuple destructuring lambda, compare on int, Array.init with closure, in-place array mutation in find, boolean union returning true iff merge happened. 147 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/mst_kruskal.ml | 32 +++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 42 insertions(+) create mode 100644 lib/ocaml/baseline/mst_kruskal.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 269c63b2..675c2dd7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -89,6 +89,7 @@ "levenshtein.ml": 11, "memo_fib.ml": 75025, "mortgage.ml": 1073, + "mst_kruskal.ml": 11, "merge_intervals.ml": 12, "merge_sort.ml": 44, "merge_two.ml": 441, diff --git a/lib/ocaml/baseline/mst_kruskal.ml b/lib/ocaml/baseline/mst_kruskal.ml new file mode 100644 index 00000000..85f8656e --- /dev/null +++ b/lib/ocaml/baseline/mst_kruskal.ml @@ -0,0 +1,32 @@ +let edges = [ + (1, 0, 1); (2, 1, 2); (3, 0, 3); (4, 2, 3); (5, 3, 4); (6, 0, 4) +] + +let make_uf n = Array.init n (fun i -> i) + +let rec find p x = + if p.(x) = x then x + else begin + let r = find p p.(x) in + p.(x) <- r; + r + end + +let union p x y = + let rx = find p x in + let ry = find p y in + if rx <> ry then begin p.(rx) <- ry; true end + else false + +let mst_weight n es = + let sorted = List.sort (fun (w1, _, _) (w2, _, _) -> compare w1 w2) es in + let p = make_uf n in + let total = ref 0 in + List.iter (fun (w, u, v) -> + if union p u v then total := !total + w + ) sorted; + !total + +;; + +mst_weight 5 edges diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 4e42583d..b2a64fea 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — mst_kruskal.ml baseline (Kruskal MST on + 5-node, 6-edge graph → MST weight 11). Sort edges by weight, + greedily add edges whose endpoints are in different components + using union-find with path compression. Edges (w,u,v) sorted: + (1,0,1) ✓ (2,1,2) ✓ (3,0,3) ✓ (4,2,3) ✗ already connected, + (5,3,4) ✓ (6,0,4) ✗. Picked weight: 1+2+3+5 = 11. Tests + List.sort with 3-tuple destructuring lambda, Array.init, in-place + array mutation (find compression), boolean-returning union. + 147 baseline programs total. - 2026-05-10 Phase 5.1 — trie.ml baseline (prefix tree, 6 of 9 word lookups match). Mutable record `{ terminal; children }` with `children : (char * trie) list`; insert recurses down character From 526ffbb5f0fdd7d3d0c67257a7c04e29d6f7bf75 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 05:41:02 +0000 Subject: [PATCH 241/298] ocaml: phase 5.1 floyd_warshall.ml baseline (4-node APSP, dist(0,3)=9) Floyd-Warshall all-pairs shortest path with triple-nested for-loop: for k = 0 to n - 1 do for i = 0 to n - 1 do for j = 0 to n - 1 do if d.(i).(k) + d.(k).(j) < d.(i).(j) then d.(i).(j) <- d.(i).(k) + d.(k).(j) done done done Graph (4 nodes, directed): 0->1 weight 5, 0->3 weight 10, 1->2 weight 3, 2->3 weight 1 Direct edge 0->3 = 10, but path 0->1->2->3 = 5+3+1 = 9. Tests 2D array via Array.init with closure, nested .(i).(j) read + write, triple-nested for, in-place mutation under aliasing. 148 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/floyd_warshall.ml | 26 ++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 35 insertions(+) create mode 100644 lib/ocaml/baseline/floyd_warshall.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 675c2dd7..dae10217 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -70,6 +70,7 @@ "fizz_classifier.ml": 540, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, + "floyd_warshall.ml": 9, "lis.ml": 6, "list_ops.ml": 30, "luhn.ml": 2, diff --git a/lib/ocaml/baseline/floyd_warshall.ml b/lib/ocaml/baseline/floyd_warshall.ml new file mode 100644 index 00000000..2c642e14 --- /dev/null +++ b/lib/ocaml/baseline/floyd_warshall.ml @@ -0,0 +1,26 @@ +let inf_int = 1000000 + +let floyd n graph = + let d = Array.init n (fun i -> + Array.init n (fun j -> graph.(i).(j))) in + for k = 0 to n - 1 do + for i = 0 to n - 1 do + for j = 0 to n - 1 do + if d.(i).(k) + d.(k).(j) < d.(i).(j) then + d.(i).(j) <- d.(i).(k) + d.(k).(j) + done + done + done; + d + +;; + +let n = 4 in +let g = Array.init n (fun _ -> Array.make n inf_int) in +for i = 0 to n - 1 do g.(i).(i) <- 0 done; +g.(0).(1) <- 5; +g.(0).(3) <- 10; +g.(1).(2) <- 3; +g.(2).(3) <- 1; +let d = floyd n g in +d.(0).(3) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b2a64fea..b564e78a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — floyd_warshall.ml baseline (all-pairs + shortest path on 4-node weighted graph, dist 0→3 = 9). Standard + O(n³) DP: for each intermediate vertex k, relax all (i,j) pairs. + Uses a 2D array implemented as `Array.init n (fun _ -> Array.make + n inf)`, exercising nested array indexing `g.(i).(j)`. Direct + edge 0→3 weighs 10; via 0→1→2→3 = 5+3+1 = 9. Tests 2D array + construction with closures, triple-nested for-loops, nested + `.(i).(j)<-` mutation. 148 baseline programs total. - 2026-05-10 Phase 5.1 — mst_kruskal.ml baseline (Kruskal MST on 5-node, 6-edge graph → MST weight 11). Sort edges by weight, greedily add edges whose endpoints are in different components From cccef832d9737e8cf0e22cfa9abeb567c7bb51c5 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 05:58:05 +0000 Subject: [PATCH 242/298] ocaml: phase 5.1 manacher.ml baseline (longest palindrome "babadaba" = 7) Manacher's algorithm: insert # separators (length 2n+1) to unify odd/even cases, then maintain palindrome radii p[] alongside a running (center, right) pair to skip work via mirror reflection. Linear time. manacher "babadaba" = 7 (* witness: "abadaba", positions 1..7 *) Note: requires parenthesizing the if-expression on the rhs of <-: p.(i) <- (if pm < v then pm else v) Real OCaml parses bare `if` at <-rhs since the rhs is at expr level; our parser places <-rhs at binop level which doesn't include `if` / `match` / `let`. Workaround until we relax the binop RHS grammar. 149 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/manacher.ml | 32 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 43 insertions(+) create mode 100644 lib/ocaml/baseline/manacher.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index dae10217..0bd5832e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -87,6 +87,7 @@ "lambda_calc.ml": 7, "lcs.ml": 4, "majority_vote.ml": 4, + "manacher.ml": 7, "levenshtein.ml": 11, "memo_fib.ml": 75025, "mortgage.ml": 1073, diff --git a/lib/ocaml/baseline/manacher.ml b/lib/ocaml/baseline/manacher.ml new file mode 100644 index 00000000..c80a250a --- /dev/null +++ b/lib/ocaml/baseline/manacher.ml @@ -0,0 +1,32 @@ +let manacher s = + let n = String.length s in + let m = 2 * n + 1 in + let t = Array.make m '#' in + for i = 0 to n - 1 do + t.(2 * i + 1) <- s.[i] + done; + let p = Array.make m 0 in + let center = ref 0 and right = ref 0 in + let max_p = ref 0 in + for i = 0 to m - 1 do + let mirror = 2 * !center - i in + if i < !right then begin + let v = !right - i in + let pm = p.(mirror) in + p.(i) <- (if pm < v then pm else v) + end; + while i + p.(i) + 1 < m && i - p.(i) - 1 >= 0 + && t.(i + p.(i) + 1) = t.(i - p.(i) - 1) do + p.(i) <- p.(i) + 1 + done; + if i + p.(i) > !right then begin + center := i; + right := i + p.(i) + end; + if p.(i) > !max_p then max_p := p.(i) + done; + !max_p + +;; + +manacher "babadaba" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b564e78a..e24a59e0 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — manacher.ml baseline (Manacher's longest + palindromic substring on "babadaba" = 7). Inserts `#` separators + to unify odd/even cases (string of length 2n+1), then maintains + palindrome radii in `p[]` plus a current rightmost-reach pair + (center, right). Linear time. The full input "babadaba" itself is + not a palindrome, but "abadaba" (positions 1..7) is — length 7. + Note: `p.(i) <- if pm < v then pm else v` requires explicit + parens — our parser doesn't accept `if` as the rhs of `<-` at + binop level (real OCaml does); workaround until we relax binop + RHS parsing. 149 baseline programs total. - 2026-05-10 Phase 5.1 — floyd_warshall.ml baseline (all-pairs shortest path on 4-node weighted graph, dist 0→3 = 9). Standard O(n³) DP: for each intermediate vertex k, relax all (i,j) pairs. From 4fdf6980dae9b304bebacc4afce5a93b09a19ca4 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 06:11:57 +0000 Subject: [PATCH 243/298] ocaml: parser accepts if/match/let/fun as rhs of <- and := Previously `a.(i) <- if c then x else y` failed with "unexpected token keyword if" because parse-binop-rhs called parse-prefix for the rhs, which doesn't accept if/match/let/fun. Real OCaml allows full expressions on the rhs of <-/:=. Fix: special-case prec-1 ops in parse-binop-rhs to call parse-expr-no-seq instead of parse-prefix. The recursive parse-binop-rhs with min-prec restored after picks up any further chained <- (since both ops are right-associative with no higher-prec binops above them). Manacher baseline updated to use bare `if` on rhs of <-, removing the parens workaround from iter 235. 607/607 regressions remain clean. --- lib/ocaml/baseline/manacher.ml | 2 +- lib/ocaml/parser.sx | 32 ++++++++++++++++++++++---------- plans/ocaml-on-sx.md | 14 ++++++++++---- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/lib/ocaml/baseline/manacher.ml b/lib/ocaml/baseline/manacher.ml index c80a250a..02bb99b3 100644 --- a/lib/ocaml/baseline/manacher.ml +++ b/lib/ocaml/baseline/manacher.ml @@ -13,7 +13,7 @@ let manacher s = if i < !right then begin let v = !right - i in let pm = p.(mirror) in - p.(i) <- (if pm < v then pm else v) + p.(i) <- if pm < v then pm else v end; while i + p.(i) + 1 < m && i - p.(i) - 1 >= 0 && t.(i + p.(i) + 1) = t.(i - p.(i) - 1) do diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 92773071..941507ea 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -741,16 +741,28 @@ (else (begin (advance-tok!) - (let - ((rhs (parse-prefix)) - (next-min - (if - (ocaml-binop-right? op) - prec - (+ prec 1)))) - (begin - (set! rhs (parse-binop-rhs rhs next-min)) - (parse-binop-rhs (list :op op lhs rhs) min-prec)))))))))))) + ;; For `<-` and `:=`, the rhs is at expression + ;; level — accept `if/match/let/fun/...` keywords + ;; on the right (real OCaml does). Otherwise, + ;; fall back to the standard prefix-then-binop + ;; chain. + (cond + ((or (= op "<-") (= op ":=")) + (let ((rhs (parse-expr-no-seq))) + (parse-binop-rhs + (list :op op lhs rhs) min-prec))) + (else + (let + ((rhs (parse-prefix)) + (next-min + (if + (ocaml-binop-right? op) + prec + (+ prec 1)))) + (begin + (set! rhs (parse-binop-rhs rhs next-min)) + (parse-binop-rhs + (list :op op lhs rhs) min-prec)))))))))))))) (define parse-binary (fn diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e24a59e0..19410721 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,16 +407,22 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — parser: accept `if/match/let/fun/...` as + the rhs of `<-` and `:=`. parse-binop-rhs now special-cases prec-1 + ops (`<-`, `:=`) to call parse-expr-no-seq for their right operand + instead of parse-prefix. Real OCaml accepts `a.(i) <- if c then x + else y`, `r := match e with | A -> 1 | B -> 2`, etc. Manacher + baseline (iter 235) updated to use bare `if` on rhs of `<-`, + matching the workaround note left in its previous Progress entry. + 607/607 regressions clean. Useful improvement for hot inner-loop + code that needs conditional rhs. - 2026-05-10 Phase 5.1 — manacher.ml baseline (Manacher's longest palindromic substring on "babadaba" = 7). Inserts `#` separators to unify odd/even cases (string of length 2n+1), then maintains palindrome radii in `p[]` plus a current rightmost-reach pair (center, right). Linear time. The full input "babadaba" itself is not a palindrome, but "abadaba" (positions 1..7) is — length 7. - Note: `p.(i) <- if pm < v then pm else v` requires explicit - parens — our parser doesn't accept `if` as the rhs of `<-` at - binop level (real OCaml does); workaround until we relax binop - RHS parsing. 149 baseline programs total. + 149 baseline programs total. - 2026-05-10 Phase 5.1 — floyd_warshall.ml baseline (all-pairs shortest path on 4-node weighted graph, dist 0→3 = 9). Standard O(n³) DP: for each intermediate vertex k, relax all (i,j) pairs. From 1dd350d59239af938413bd59c2fb35b26115002b Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 06:21:06 +0000 Subject: [PATCH 244/298] ocaml: phase 5.1 huffman.ml baseline (Huffman tree WPL = 224) Classic CLRS Huffman code example. ADT: type tree = Leaf of int * char | Node of int * tree * tree Build by repeatedly merging two lightest trees (sorted-list pq): let rec build_tree lst = match lst with | [t] -> t | a :: b :: rest -> let merged = Node (weight a + weight b, a, b) in build_tree (insert merged rest) weighted path length (= total Huffman bits): leaves {(5,a) (9,b) (12,c) (13,d) (16,e) (45,f)} -> 224 Tests sum-typed ADT with mixed arities, `function` keyword pattern matching, recursive sorted insert, depth-counting recursion. 150 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/huffman.ml | 38 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/huffman.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 0bd5832e..6a9c8c82 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -65,6 +65,7 @@ "hamming.ml": 4, "hanoi.ml": 1023, "hist.ml": 75, + "huffman.ml": 224, "int_sqrt.ml": 1027, "is_prime.ml": 25, "fizz_classifier.ml": 540, diff --git a/lib/ocaml/baseline/huffman.ml b/lib/ocaml/baseline/huffman.ml new file mode 100644 index 00000000..774fb09a --- /dev/null +++ b/lib/ocaml/baseline/huffman.ml @@ -0,0 +1,38 @@ +type tree = Leaf of int * char | Node of int * tree * tree + +let weight = function + | Leaf (w, _) -> w + | Node (w, _, _) -> w + +let rec insert t lst = + match lst with + | [] -> [t] + | h :: rest -> + if weight t <= weight h then t :: lst + else h :: insert t rest + +let rec build_tree lst = + match lst with + | [] -> failwith "empty" + | [t] -> t + | a :: b :: rest -> + let merged = Node (weight a + weight b, a, b) in + build_tree (insert merged rest) + +let rec depth d t = + match t with + | Leaf (w, _) -> w * d + | Node (_, l, r) -> depth (d + 1) l + depth (d + 1) r + +;; + +let initial = [ + Leaf (5, 'a'); + Leaf (9, 'b'); + Leaf (12, 'c'); + Leaf (13, 'd'); + Leaf (16, 'e'); + Leaf (45, 'f') +] in +let t = build_tree initial in +depth 0 t diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 19410721..eb1d0725 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — huffman.ml baseline (Huffman tree weighted + path length on letters {(5,a) (9,b) (12,c) (13,d) (16,e) (45,f)} + = 224). Builds optimal prefix code by repeatedly merging the two + lightest trees: insert merged node back into a sorted-by-weight + list. Verifies the standard Huffman result of 224 bits for this + classic CLRS example. Tests sum-typed ADT with two arities + (Leaf of int * char | Node of int * tree * tree), `function` + keyword pattern matching, recursive sorted insert, depth-counting + recursion. 150 baseline programs total. - 2026-05-10 Phase 5.1 — parser: accept `if/match/let/fun/...` as the rhs of `<-` and `:=`. parse-binop-rhs now special-cases prec-1 ops (`<-`, `:=`) to call parse-expr-no-seq for their right operand From 19d0ef0f38b4438e9d33814c0527e34970e98ed8 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 06:34:13 +0000 Subject: [PATCH 245/298] ocaml: phase 5.1 rolling_hash.ml baseline (Rabin-Karp, 6 "abc" matches) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polynomial rolling hash mod 1000003 with base 257: - precompute base^(m-1) - slide window updating hash in O(1) per step - verify hash match with O(m) memcmp to skip false positives rolling_match "abcabcabcabcabcabc" "abc" = 6 Six non-overlapping copies of "abc" at positions 0,3,6,9,12,15. Tests `for _ = 0 to m - 2 do … done` unused loop variable (uses underscore wildcard pattern), Char.code arithmetic, mod arithmetic with intermediate negative subtractions, complex nested if/begin branching with inner break-via-flag. 151 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/rolling_hash.ml | 38 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/rolling_hash.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6a9c8c82..6b361776 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -122,6 +122,7 @@ "quickselect.ml": 5, "quicksort.ml": 44, "roman.ml": 44, + "rolling_hash.ml": 6, "reverse_int.ml": 54329, "rpn.ml": 9, "run_decode.ml": 21, diff --git a/lib/ocaml/baseline/rolling_hash.ml b/lib/ocaml/baseline/rolling_hash.ml new file mode 100644 index 00000000..2b370cf9 --- /dev/null +++ b/lib/ocaml/baseline/rolling_hash.ml @@ -0,0 +1,38 @@ +let mod_p = 1000003 +let base = 257 + +let rolling_match text pat = + let n = String.length text in + let m = String.length pat in + if m > n then 0 + else begin + let pat_hash = ref 0 in + let win_hash = ref 0 in + let high = ref 1 in + for _ = 0 to m - 2 do + high := (!high * base) mod mod_p + done; + for i = 0 to m - 1 do + pat_hash := (!pat_hash * base + Char.code pat.[i]) mod mod_p; + win_hash := (!win_hash * base + Char.code text.[i]) mod mod_p + done; + let count = ref 0 in + for i = 0 to n - m do + if !win_hash = !pat_hash then begin + let ok = ref true in + for j = 0 to m - 1 do + if text.[i + j] <> pat.[j] then ok := false + done; + if !ok then count := !count + 1 + end; + if i < n - m then begin + let drop = (Char.code text.[i] * !high) mod mod_p in + win_hash := ((!win_hash - drop + mod_p) * base + Char.code text.[i + m]) mod mod_p + end + done; + !count + end + +;; + +rolling_match "abcabcabcabcabcabc" "abc" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index eb1d0725..97c7934e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — rolling_hash.ml baseline (Rabin-Karp + rolling hash for substring matching, count "abc" in + "abcabcabcabcabcabc" = 6). Polynomial hash mod 1000003 with + base 257; precompute base^(m-1), then slide window updating + hash in O(1) per step. Verify hash matches with O(m) memcmp to + avoid false positives. Tests `for _ = 0 to m - 2 do … done` + unused loop variable, char-code arithmetic, mod under negative + intermediate, complex nested if/begin/end branching. + 151 baseline programs total. - 2026-05-10 Phase 5.1 — huffman.ml baseline (Huffman tree weighted path length on letters {(5,a) (9,b) (12,c) (13,d) (16,e) (45,f)} = 224). Builds optimal prefix code by repeatedly merging the two From 0ef26b20f3a56070a5a0aec93dd400581388fef1 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 06:43:46 +0000 Subject: [PATCH 246/298] ocaml: phase 5.1 binary_heap.ml baseline (min-heap sort 9 vals -> 123456789) Array-backed binary min-heap with explicit size tracking via ref: let push a size x = a.(!size) <- x; size := !size + 1; sift_up a (!size - 1) let pop a size = let m = a.(0) in size := !size - 1; a.(0) <- a.(!size); sift_down a !size 0; m Push [9;4;7;1;8;3;5;2;6], pop nine times -> 1,2,3,4,5,6,7,8,9. Fold-as-decimal: ((((((((1*10+2)*10+3)*10+4)*10+5)*10+6)*10+7)*10+8)*10+9 = 123456789. Tests recursive sift_up + sift_down, in-place array swap, parent/lchild/rchild index arithmetic, combined push/pop session with refs. 152 baseline programs total. --- lib/ocaml/baseline/binary_heap.ml | 47 +++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++ 3 files changed, 56 insertions(+) create mode 100644 lib/ocaml/baseline/binary_heap.ml diff --git a/lib/ocaml/baseline/binary_heap.ml b/lib/ocaml/baseline/binary_heap.ml new file mode 100644 index 00000000..108d8ca5 --- /dev/null +++ b/lib/ocaml/baseline/binary_heap.ml @@ -0,0 +1,47 @@ +let parent i = (i - 1) / 2 +let lchild i = 2 * i + 1 +let rchild i = 2 * i + 2 + +let swap a i j = + let t = a.(i) in + a.(i) <- a.(j); + a.(j) <- t + +let rec sift_up a i = + if i > 0 && a.(parent i) > a.(i) then begin + swap a i (parent i); + sift_up a (parent i) + end + +let rec sift_down a n i = + let l = lchild i and r = rchild i in + let smallest = ref i in + if l < n && a.(l) < a.(!smallest) then smallest := l; + if r < n && a.(r) < a.(!smallest) then smallest := r; + if !smallest <> i then begin + swap a i !smallest; + sift_down a n !smallest + end + +let push a size x = + a.(!size) <- x; + size := !size + 1; + sift_up a (!size - 1) + +let pop a size = + let m = a.(0) in + size := !size - 1; + a.(0) <- a.(!size); + sift_down a !size 0; + m + +;; + +let a = Array.make 20 0 in +let s = ref 0 in +List.iter (fun x -> push a s x) [9; 4; 7; 1; 8; 3; 5; 2; 6]; +let total = ref 0 in +for _ = 1 to 9 do + total := !total * 10 + pop a s +done; +!total diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6b361776..d521d489 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -11,6 +11,7 @@ "bf_full.ml": 6, "bisect.ml": 141, "bigint_add.ml": 28, + "binary_heap.ml": 123456789, "bits.ml": 21, "balance.ml": 3, "base_n.ml": 17, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 97c7934e..ee42b769 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — binary_heap.ml baseline (array-backed + binary min-heap, push 9 random values then pop in sorted order + → digits concat to 123456789). Standard heap mechanics: parent + /lchild/rchild index arithmetic, sift-up after push, sift-down + after pop. Push [9;4;7;1;8;3;5;2;6], pop returns 1..9 in order; + fold-as-decimal yields 123456789. Tests recursive sift_up / + sift_down, ref-tracked external size, in-place array swap, + combined push/pop with mutable closure. 152 baseline programs total. - 2026-05-10 Phase 5.1 — rolling_hash.ml baseline (Rabin-Karp rolling hash for substring matching, count "abc" in "abcabcabcabcabcabc" = 6). Polynomial hash mod 1000003 with From b4571f0f9fec9155f50a2ec72376820efaced780 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 06:53:38 +0000 Subject: [PATCH 247/298] ocaml: phase 5.1 lev_iter.ml baseline (sum of 5 edit distances = 16) Iterative Levenshtein DP with rolling 1D arrays for O(min(m,n)) space. Distances: kitten -> sitting : 3 saturday -> sunday : 3 abc -> abc : 0 "" -> abcde : 5 intention -> execution : 5 ---------------------------- total : 16 Complementary to the existing levenshtein.ml which uses the exponential recursive form (only sums tiny strings); this one is the practical iterative variant used for real ED. Tests the recently-fixed <- with bare `if` rhs: curr.(j) <- (if m1 < c then m1 else c) + 1 153 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/lev_iter.ml | 30 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 40 insertions(+) create mode 100644 lib/ocaml/baseline/lev_iter.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d521d489..9f7e6bb4 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -90,6 +90,7 @@ "lcs.ml": 4, "majority_vote.ml": 4, "manacher.ml": 7, + "lev_iter.ml": 16, "levenshtein.ml": 11, "memo_fib.ml": 75025, "mortgage.ml": 1073, diff --git a/lib/ocaml/baseline/lev_iter.ml b/lib/ocaml/baseline/lev_iter.ml new file mode 100644 index 00000000..15e2ecd6 --- /dev/null +++ b/lib/ocaml/baseline/lev_iter.ml @@ -0,0 +1,30 @@ +let lev_iter s1 s2 = + let m = String.length s1 in + let n = String.length s2 in + let prev = Array.make (n + 1) 0 in + let curr = Array.make (n + 1) 0 in + for j = 0 to n do prev.(j) <- j done; + for i = 1 to m do + curr.(0) <- i; + for j = 1 to n do + if s1.[i - 1] = s2.[j - 1] then + curr.(j) <- prev.(j - 1) + else begin + let a = prev.(j) in + let b = curr.(j - 1) in + let c = prev.(j - 1) in + let m1 = if a < b then a else b in + curr.(j) <- (if m1 < c then m1 else c) + 1 + end + done; + for j = 0 to n do prev.(j) <- curr.(j) done + done; + prev.(n) + +;; + +lev_iter "kitten" "sitting" ++ lev_iter "saturday" "sunday" ++ lev_iter "abc" "abc" ++ lev_iter "" "abcde" ++ lev_iter "intention" "execution" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ee42b769..5bb9b9bb 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — lev_iter.ml baseline (iterative + Levenshtein DP, sum of 5 distances = 16). Rolling-array DP + (O(min(m,n)) space). Distances: kitten→sitting=3, saturday→ + sunday=3, abc→abc=0, ""→abcde=5, intention→execution=5; + 3+3+0+5+5=16. Complementary to existing levenshtein.ml which + uses the recursive (exponential) definition; this one is the + practical iterative form used for real ED computations. Tests + the new `<- if ... then ... else ...` rhs path twice in the + same body. 153 baseline programs total. - 2026-05-10 Phase 5.1 — binary_heap.ml baseline (array-backed binary min-heap, push 9 random values then pop in sorted order → digits concat to 123456789). Standard heap mechanics: parent From bed374c9e196474c16d6aaede5474341df25ecf2 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 07:06:29 +0000 Subject: [PATCH 248/298] ocaml: phase 5.1 tarjan_scc.ml baseline (8-node digraph, 4 SCCs) Tarjan's strongly-connected components in a single DFS using index/lowlink: graph (8 nodes, directed): 0 -> 1 -> 2 -> 0 (3-cycle) 2 -> 3 3 -> 4 4 -> 5 -> 6 -> 4 (3-cycle) 4 -> 7 SCCs: {0,1,2}, {3}, {4,5,6}, {7} = 4 components Module-level ref + array state (index_arr, lowlink, on_stack, stack, scc_count). When lowlink(v) = index(v), pop from stack until v is removed; that's a complete SCC. Tests: recursive function with module-level mutable state, nested begin/end branches inside List.iter closure, inner `let rec pop ()` traversing a ref-of-list, pattern match on [] / h :: rest cons-list shape. 154 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/tarjan_scc.ml | 53 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++ 3 files changed, 63 insertions(+) create mode 100644 lib/ocaml/baseline/tarjan_scc.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 9f7e6bb4..42e161f2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -135,6 +135,7 @@ "stable_unique.ml": 46, "subseq_check.ml": 3, "tail_factorial.ml": 479001600, + "tarjan_scc.ml": 4, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, "topo_sort.ml": 6, diff --git a/lib/ocaml/baseline/tarjan_scc.ml b/lib/ocaml/baseline/tarjan_scc.ml new file mode 100644 index 00000000..6ad3f90e --- /dev/null +++ b/lib/ocaml/baseline/tarjan_scc.ml @@ -0,0 +1,53 @@ +let n = 8 + +let adj = [| + [1]; + [2]; + [0; 3]; + [4]; + [5; 7]; + [6]; + [4]; + [] +|] + +let index_counter = ref 0 +let stack = ref [] +let on_stack = Array.make n false +let index_arr = Array.make n (-1) +let lowlink = Array.make n 0 +let scc_count = ref 0 + +let rec strongconnect v = + index_arr.(v) <- !index_counter; + lowlink.(v) <- !index_counter; + index_counter := !index_counter + 1; + stack := v :: !stack; + on_stack.(v) <- true; + List.iter (fun w -> + if index_arr.(w) = -1 then begin + strongconnect w; + if lowlink.(w) < lowlink.(v) then lowlink.(v) <- lowlink.(w) + end else if on_stack.(w) then begin + if index_arr.(w) < lowlink.(v) then lowlink.(v) <- index_arr.(w) + end + ) adj.(v); + if lowlink.(v) = index_arr.(v) then begin + let rec pop () = + match !stack with + | [] -> () + | w :: rest -> + stack := rest; + on_stack.(w) <- false; + if w <> v then pop () + in + pop (); + scc_count := !scc_count + 1 + end + +;; + +for v = 0 to n - 1 do + if index_arr.(v) = -1 then strongconnect v +done; +!scc_count diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5bb9b9bb..c881517c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — tarjan_scc.ml baseline (Tarjan's + strongly-connected components on 8-node digraph → 4 SCCs). + Graph: 0→1→2→0 (cycle) plus 2→3, 3→4, 4→5→6→4 (cycle), 4→7. + SCCs: {0,1,2}, {3}, {4,5,6}, {7} = 4 components. Single DFS + with index/lowlink, on-stack flag, pop until root when + lowlink = index. Tests recursive functions with module-level + ref + array state, nested begin/end branches inside List.iter + closure, inner `let rec pop ()` walking ref-of-list, pattern + match on `[] | h :: rest`. 154 baseline programs total. - 2026-05-10 Phase 5.1 — lev_iter.ml baseline (iterative Levenshtein DP, sum of 5 distances = 16). Rolling-array DP (O(min(m,n)) space). Distances: kitten→sitting=3, saturday→ From 4761d41a0da8f06f098a9499f242ced2dac3418a Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 21:37:41 +0000 Subject: [PATCH 249/298] ocaml: && / || short-circuit fix + bfs_grid.ml baseline (5x5 grid, dist 8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: `:op` handler always evaluated both operands before dispatching to ocaml-eval-op. For pure binops that's fine, but `&&` / `||` MUST short-circuit: if nr >= 0 && grid.(nr).(nc) = 0 then ... When nr = -1, real OCaml never evaluates `grid.(-1)`. Our evaluator did, and crashed with "nth: list/string and number". Fix: special-case `&&` and `||` in :op dispatch, mirroring the same pattern already used for `:=` and `<-`. Evaluate lhs, branch on it, and only evaluate rhs when needed. Latent since baseline 1 — earlier programs never triggered it because the rhs was unconditionally safe. bfs_grid.ml: shortest path through a 5x5 grid with walls. Standard BFS using Queue.{push,pop,is_empty} + Array.init for the 2D distance matrix. Path 0,0 -> ... -> 4,4 has length 8. 155 baseline programs total. --- lib/ocaml/baseline/bfs_grid.ml | 42 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + lib/ocaml/eval.sx | 8 ++++++ plans/ocaml-on-sx.md | 13 ++++++++++ 4 files changed, 64 insertions(+) create mode 100644 lib/ocaml/baseline/bfs_grid.ml diff --git a/lib/ocaml/baseline/bfs_grid.ml b/lib/ocaml/baseline/bfs_grid.ml new file mode 100644 index 00000000..240f42fa --- /dev/null +++ b/lib/ocaml/baseline/bfs_grid.ml @@ -0,0 +1,42 @@ +let h = 5 +let w = 5 + +let grid = [| + [| 0; 0; 1; 0; 0 |]; + [| 1; 0; 1; 0; 1 |]; + [| 0; 0; 0; 0; 0 |]; + [| 0; 1; 1; 1; 0 |]; + [| 0; 0; 0; 0; 0 |] +|] + +let step dist q r c nr nc = + if nr >= 0 && nr < h && nc >= 0 && nc < w + && grid.(nr).(nc) = 0 && dist.(nr).(nc) = -1 then begin + dist.(nr).(nc) <- dist.(r).(c) + 1; + Queue.push (nr * 10 + nc) q + end + +let bfs sr sc tr tc = + let dist = Array.init h (fun _ -> Array.make w (-1)) in + let q = Queue.create () in + dist.(sr).(sc) <- 0; + Queue.push (sr * 10 + sc) q; + let go = ref true in + while !go do + if Queue.is_empty q then go := false + else if dist.(tr).(tc) <> -1 then go := false + else begin + let rc = Queue.pop q in + let r = rc / 10 in + let c = rc mod 10 in + step dist q r c (r - 1) c; + step dist q r c (r + 1) c; + step dist q r c r (c - 1); + step dist q r c r (c + 1) + end + done; + dist.(tr).(tc) + +;; + +bfs 0 0 4 4 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 42e161f2..e09b1de9 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -16,6 +16,7 @@ "balance.ml": 3, "base_n.ml": 17, "bfs.ml": 6, + "bfs_grid.ml": 8, "btree.ml": 39, "brainfuck.ml": 75, "bsearch.ml": 7, diff --git a/lib/ocaml/eval.sx b/lib/ocaml/eval.sx index 71b2eabc..0de8c408 100644 --- a/lib/ocaml/eval.sx +++ b/lib/ocaml/eval.sx @@ -657,6 +657,14 @@ (error (str "ocaml-eval: <- expects a field-access lhs, got " (ocaml-tag-of lhs-ast))))))) + ;; && and || short-circuit — must NOT evaluate rhs when + ;; lhs already decides. Mirrors real OCaml semantics. + ((= op "&&") + (let ((lhs (ocaml-eval (nth ast 2) env))) + (if lhs (ocaml-eval (nth ast 3) env) false))) + ((= op "||") + (let ((lhs (ocaml-eval (nth ast 2) env))) + (if lhs true (ocaml-eval (nth ast 3) env)))) (else (ocaml-eval-op op (ocaml-eval (nth ast 2) env) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c881517c..f40cd5f2 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,19 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — `&&` / `||` short-circuit fix + bfs_grid.ml + baseline (BFS shortest path on 5×5 grid with walls, dist 8). The + evaluator's `:op` handler was evaluating BOTH sides of `&&`/`||` + before dispatching, mismatching OCaml's left-to-right + short-circuit semantics. This was invisible in earlier baselines + because their rhs was always safe regardless of lhs, but a guard + like `if nr >= 0 && grid.(nr).(nc) = 0 then …` crashes with + "nth: list/string and number" when nr = -1 because `grid.(-1)` + still gets evaluated. Fix: dispatch `&&` and `||` specially in + `:op` to evaluate lhs first and only evaluate rhs if needed. + Bug latent since baseline 1; surfaced by bfs_grid with bounds- + checked grid access. bfs_grid optimal path 0→1→1→2→2→3→3→3→4 of + weighted length 8. 155 baseline programs total. - 2026-05-10 Phase 5.1 — tarjan_scc.ml baseline (Tarjan's strongly-connected components on 8-node digraph → 4 SCCs). Graph: 0→1→2→0 (cycle) plus 2→3, 3→4, 4→5→6→4 (cycle), 4→7. From e057d9f18fd5d0914554c02adc03864cced2ba87 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 21:47:52 +0000 Subject: [PATCH 250/298] ocaml: phase 5.1 next_permutation.ml baseline (5! - 1 = 119 successors) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standard in-place next-permutation (Narayana's algorithm): let next_perm a = let n = Array.length a in let i = ref (n - 2) in while !i >= 0 && a.(!i) >= a.(!i + 1) do i := !i - 1 done; if !i < 0 then false else begin let j = ref (n - 1) in while a.(!j) <= a.(!i) do j := !j - 1 done; swap a.(!i) a.(!j); reverse a (!i + 1) (n - 1); true end Starting from [1;2;3;4;5], next_perm returns true 119 times then false (when reverse-sorted). 5! - 1 = 119. Tests guarded `while … && a.(!i) … do` loops that rely on the iter-242 short-circuit fix. 156 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/next_permutation.ml | 30 ++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 40 insertions(+) create mode 100644 lib/ocaml/baseline/next_permutation.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e09b1de9..d1aa3488 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -102,6 +102,7 @@ "module_use.ml": 3, "monotonic.ml": 4, "newton_sqrt.ml": 1414, + "next_permutation.ml": 119, "number_words.ml": 106, "mutable_record.ml": 10, "option_match.ml": 5, diff --git a/lib/ocaml/baseline/next_permutation.ml b/lib/ocaml/baseline/next_permutation.ml new file mode 100644 index 00000000..7cee449e --- /dev/null +++ b/lib/ocaml/baseline/next_permutation.ml @@ -0,0 +1,30 @@ +let next_perm a = + let n = Array.length a in + let i = ref (n - 2) in + while !i >= 0 && a.(!i) >= a.(!i + 1) do + i := !i - 1 + done; + if !i < 0 then false + else begin + let j = ref (n - 1) in + while a.(!j) <= a.(!i) do + j := !j - 1 + done; + let t = a.(!i) in a.(!i) <- a.(!j); a.(!j) <- t; + let lo = ref (!i + 1) and hi = ref (n - 1) in + while !lo < !hi do + let t = a.(!lo) in a.(!lo) <- a.(!hi); a.(!hi) <- t; + lo := !lo + 1; + hi := !hi - 1 + done; + true + end + +;; + +let a = [| 1; 2; 3; 4; 5 |] in +let count = ref 0 in +while next_perm a do + count := !count + 1 +done; +!count diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f40cd5f2..0646c2ec 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — next_permutation.ml baseline (count + permutations of [1;2;3;4;5] via Narayana's algorithm = 119). + Standard in-place algorithm: find largest i with a.(i) < a.(i+1), + largest j > i with a.(j) > a.(i), swap, reverse suffix from i+1. + Returns false when array is the reverse-sorted (final) + permutation. 5! = 120, minus the initial = 119 successful calls. + Tests guarded `while … >= 0 && a.(!i) >= a.(!i + 1) do …` + pattern using the new short-circuit semantics (would loop or + crash without iter-242 fix). 156 baseline programs total. - 2026-05-10 Phase 5.1 — `&&` / `||` short-circuit fix + bfs_grid.ml baseline (BFS shortest path on 5×5 grid with walls, dist 8). The evaluator's `:op` handler was evaluating BOTH sides of `&&`/`||` From 3ea8967571a201a5f6fb397afa3e2fbb65a5dd0e Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 21:57:42 +0000 Subject: [PATCH 251/298] ocaml: phase 5.1 flood_fill.ml baseline (largest grid component = 7) Recursive 4-way flood fill from every unvisited 1-cell: let rec flood visited r c = if r < 0 || r >= h || c < 0 || c >= w then 0 else if visited.(r).(c) || grid.(r).(c) = 0 then 0 else begin visited.(r).(c) <- true; 1 + flood visited (r - 1) c + flood visited (r + 1) c + flood visited r (c - 1) + flood visited r (c + 1) end Grid (1s shown as #, 0s as .): # # . # # # . . . # . . # . . # # # # . . . . # # Largest component: {(2,2),(3,0),(3,1),(3,2),(3,3),(4,3),(4,4)} = 7. Bounds check r >= 0 must short-circuit before visited/grid reads; relies on the && / || fix from iter 242. 157 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/flood_fill.ml | 38 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 8 +++++++ 3 files changed, 47 insertions(+) create mode 100644 lib/ocaml/baseline/flood_fill.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d1aa3488..26f54877 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -73,6 +73,7 @@ "fizz_classifier.ml": 540, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, + "flood_fill.ml": 7, "floyd_warshall.ml": 9, "lis.ml": 6, "list_ops.ml": 30, diff --git a/lib/ocaml/baseline/flood_fill.ml b/lib/ocaml/baseline/flood_fill.ml new file mode 100644 index 00000000..bc2a7e25 --- /dev/null +++ b/lib/ocaml/baseline/flood_fill.ml @@ -0,0 +1,38 @@ +let h = 5 +let w = 5 + +let grid = [| + [| 1; 1; 0; 1; 1 |]; + [| 1; 0; 0; 0; 1 |]; + [| 0; 0; 1; 0; 0 |]; + [| 1; 1; 1; 1; 0 |]; + [| 0; 0; 0; 1; 1 |] +|] + +let rec flood visited r c = + if r < 0 || r >= h || c < 0 || c >= w then 0 + else if visited.(r).(c) || grid.(r).(c) = 0 then 0 + else begin + visited.(r).(c) <- true; + 1 + flood visited (r - 1) c + + flood visited (r + 1) c + + flood visited r (c - 1) + + flood visited r (c + 1) + end + +let largest_component () = + let visited = Array.init h (fun _ -> Array.make w false) in + let best = ref 0 in + for r = 0 to h - 1 do + for c = 0 to w - 1 do + if grid.(r).(c) = 1 && not visited.(r).(c) then begin + let s = flood visited r c in + if s > !best then best := s + end + done + done; + !best + +;; + +largest_component () diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 0646c2ec..e2a86a11 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — flood_fill.ml baseline (largest connected + component in 5×5 grid = 7). Recursive 4-direction flood from + every unvisited 1-cell; bounds check short-circuits before the + visited/grid reads (relies on iter-242 fix). The maximal + component in the test grid spans {(2,2), (3,0), (3,1), (3,2), + (3,3), (4,3), (4,4)} = 7 cells. Tests recursive function with + 4-way self-call, in-place visited array mutation, nested + short-circuited bounds + content guards. 157 baseline programs total. - 2026-05-10 Phase 5.1 — next_permutation.ml baseline (count permutations of [1;2;3;4;5] via Narayana's algorithm = 119). Standard in-place algorithm: find largest i with a.(i) < a.(i+1), From 5eed0dd5f59fcca811c892975c9a9f2a80cdfced Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 22:07:17 +0000 Subject: [PATCH 252/298] ocaml: phase 5.1 coin_min.ml baseline (67 cents in US coins = 6) Minimum-coin DP with -1 sentinel for unreachable values: let coin_min coins amount = let dp = Array.make (amount + 1) (-1) in dp.(0) <- 0; for i = 1 to amount do List.iter (fun c -> if c <= i && dp.(i - c) >= 0 then begin let cand = dp.(i - c) + 1 in if dp.(i) < 0 || cand < dp.(i) then dp.(i) <- cand end ) coins done; dp.(amount) coin_min [1; 5; 10; 25] 67 = 6 (* 25+25+10+5+1+1 *) Tests `if c <= i && dp.(i-c) >= 0 then` short-circuit guard; relies on iter-242 fix so dp.(i-c) is not evaluated when c > i. 158 baseline programs total. --- lib/ocaml/baseline/coin_min.ml | 16 ++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/coin_min.ml diff --git a/lib/ocaml/baseline/coin_min.ml b/lib/ocaml/baseline/coin_min.ml new file mode 100644 index 00000000..bbf7c34f --- /dev/null +++ b/lib/ocaml/baseline/coin_min.ml @@ -0,0 +1,16 @@ +let coin_min coins amount = + let dp = Array.make (amount + 1) (-1) in + dp.(0) <- 0; + for i = 1 to amount do + List.iter (fun c -> + if c <= i && dp.(i - c) >= 0 then begin + let cand = dp.(i - c) + 1 in + if dp.(i) < 0 || cand < dp.(i) then dp.(i) <- cand + end + ) coins + done; + dp.(amount) + +;; + +coin_min [1; 5; 10; 25] 67 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 26f54877..e3b5f06a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -25,6 +25,7 @@ "catalan.ml": 42, "closures.ml": 315, "coin_change.ml": 6, + "coin_min.ml": 6, "count_change.ml": 406, "count_inversions.ml": 12, "csv.ml": 10, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e2a86a11..ed2dd5ae 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — coin_min.ml baseline (minimum-coin change + for 67¢ with US denominations = 6 coins). DP with -1 sentinel + for unreachable values: `dp.(i) := min over coins c of dp.(i-c)+1 + when dp.(i-c) >= 0`. 67 = 25+25+10+5+1+1 = 6 coins. Tests `if + c <= i && dp.(i - c) >= 0 then …` guard short-circuit; without + iter-242 fix the dp.(i-c) read would crash for c > i. 158 + baseline programs total. - 2026-05-10 Phase 5.1 — flood_fill.ml baseline (largest connected component in 5×5 grid = 7). Recursive 4-direction flood from every unvisited 1-cell; bounds check short-circuits before the From bcb7db2ea469207babad3729b78f73726336648f Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 22:17:40 +0000 Subject: [PATCH 253/298] ocaml: phase 5.1 radix_sort.ml baseline (LSD radix sort, sentinel 802002) LSD radix sort over base 10 digits. Per pass: - 10 bucket-refs created via Array.init 10 (fun _ -> ref []) (each closure call yields a distinct list cell) - scan array, append each value to its digit's bucket - flatten buckets back to the array in order Input [170;45;75;90;802;24;2;66] Output [2;24;45;66;75;90;170;802] Sentinel: a.(0) + a.(7)*1000 = 2 + 802*1000 = 802002. Tests array-of-refs with !buckets.(d) deref, list-mode bucket sort within in-place array sort, unused for-loop var (`for _ = 1 to maxd`). 159 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/radix_sort.ml | 40 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++ 3 files changed, 50 insertions(+) create mode 100644 lib/ocaml/baseline/radix_sort.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e3b5f06a..f980e5bf 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -127,6 +127,7 @@ "queens.ml": 2, "quickselect.ml": 5, "quicksort.ml": 44, + "radix_sort.ml": 802002, "roman.ml": 44, "rolling_hash.ml": 6, "reverse_int.ml": 54329, diff --git a/lib/ocaml/baseline/radix_sort.ml b/lib/ocaml/baseline/radix_sort.ml new file mode 100644 index 00000000..82353d35 --- /dev/null +++ b/lib/ocaml/baseline/radix_sort.ml @@ -0,0 +1,40 @@ +let max_digit n arr = + let m = ref arr.(0) in + for i = 1 to n - 1 do + if arr.(i) > !m then m := arr.(i) + done; + let d = ref 0 in + let x = ref !m in + while !x > 0 do + d := !d + 1; + x := !x / 10 + done; + !d + +let radix_sort arr = + let n = Array.length arr in + let maxd = max_digit n arr in + let exp = ref 1 in + for _ = 1 to maxd do + let buckets = Array.init 10 (fun _ -> ref []) in + for i = 0 to n - 1 do + let digit = (arr.(i) / !exp) mod 10 in + let b = buckets.(digit) in + b := arr.(i) :: !b + done; + let k = ref 0 in + for d = 0 to 9 do + let xs = List.rev !(buckets.(d)) in + List.iter (fun v -> + arr.(!k) <- v; + k := !k + 1 + ) xs + done; + exp := !exp * 10 + done + +;; + +let a = [| 170; 45; 75; 90; 802; 24; 2; 66 |] in +radix_sort a; +a.(0) + a.(7) * 1000 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ed2dd5ae..253ec778 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — radix_sort.ml baseline (LSD radix sort, + fingerprint a.(0) + a.(7)*1000 = 802002). 8-element array + [170;45;75;90;802;24;2;66] sorts to [2;24;45;66;75;90;170;802]. + Sentinel encoding 2 + 802*1000 = 802002. Uses `Array.init 10 + (fun _ -> ref [])` to allocate 10 fresh per-digit bucket cells + per pass (each closure call yields a distinct ref). Tests + array-of-refs with `!buckets.(d)` deref pattern, list-mode + bucket sort within in-place array sort, `for _ = 1 to maxd` + unused loop variable. 159 baseline programs total. - 2026-05-10 Phase 5.1 — coin_min.ml baseline (minimum-coin change for 67¢ with US denominations = 6 coins). DP with -1 sentinel for unreachable values: `dp.(i) := min over coins c of dp.(i-c)+1 From 5384ff6c42a8e11f06d0fdfb36c34b32a89ab965 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 22:27:18 +0000 Subject: [PATCH 254/298] ocaml: phase 5.1 topo_dfs.ml baseline (DFS topo sort, fingerprint 24135) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DFS topological sort — recurse on out-edges first, prepend after: let rec dfs v = if not visited.(v) then begin visited.(v) <- true; List.iter dfs adj.(v); order := v :: !order end Same 6-node DAG as iter 230's Kahn's-algorithm baseline: 0 -> {1, 2} 1 -> {3} 2 -> {3, 4} 3 -> {5} 4 -> {5} 5 DFS order: [0; 2; 4; 1; 3; 5] Horner fold: 0->0->2->24->241->2413->24135. Complementary to topo_sort.ml (Kahn's BFS); tests recursive DFS with no explicit stack + List.fold_left as a horner reduction. 160 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/topo_dfs.ml | 26 ++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 36 insertions(+) create mode 100644 lib/ocaml/baseline/topo_dfs.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f980e5bf..f83a769d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -143,6 +143,7 @@ "tarjan_scc.ml": 4, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, + "topo_dfs.ml": 24135, "topo_sort.ml": 6, "word_freq.ml": 8, "xor_cipher.ml": 601, diff --git a/lib/ocaml/baseline/topo_dfs.ml b/lib/ocaml/baseline/topo_dfs.ml new file mode 100644 index 00000000..172b2b78 --- /dev/null +++ b/lib/ocaml/baseline/topo_dfs.ml @@ -0,0 +1,26 @@ +let n = 6 +let adj = [| + [1; 2]; + [3]; + [3; 4]; + [5]; + [5]; + [] +|] + +let visited = Array.make n false +let order = ref [] + +let rec dfs v = + if not visited.(v) then begin + visited.(v) <- true; + List.iter dfs adj.(v); + order := v :: !order + end + +;; + +for v = 0 to n - 1 do + dfs v +done; +List.fold_left (fun acc v -> acc * 10 + v) 0 !order diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 253ec778..935dd1d9 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — topo_dfs.ml baseline (DFS-based topo sort + on the same 6-node DAG as topo_sort.ml, digit-fingerprint 24135). + Cons each node onto the order list AFTER recursing on all its + out-edges, giving a reverse topological order. Same graph + (0→{1,2}; 1→{3}; 2→{3,4}; 3→{5}; 4→{5}; 5) produces order + [0;2;4;1;3;5]. Reduce: 0→0→2→24→241→2413→24135. Complementary + to Kahn's (iter 230); tests recursive DFS without an explicit + stack and List.fold_left as a horner reduction. 160 baseline + programs total. - 2026-05-10 Phase 5.1 — radix_sort.ml baseline (LSD radix sort, fingerprint a.(0) + a.(7)*1000 = 802002). 8-element array [170;45;75;90;802;24;2;66] sorts to [2;24;45;66;75;90;170;802]. From c69a7694c8e64e87732728f5638f500c5a8f629e Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 22:37:44 +0000 Subject: [PATCH 255/298] ocaml: phase 5.1 min_cost_path.ml baseline (4x4 grid DP, optimal cost 12) Standard 2D DP for min-cost path with right/down moves only: dp[i][j] = min(dp[i-1][j], dp[i][j-1]) + cost[i][j] cost: dp: 1 3 1 2 1 4 5 7 1 5 1 3 2 7 6 9 4 2 1 4 6 8 7 11 1 6 2 3 7 13 9 12 Optimal cost from (0,0) to (3,3) = 12. Tests nested 2D arrays via Array.init + Array.make, double-nested for-loops with branched edges (first row, first column, general), mixed .(i-1).(j) read + .(i).(j)<- write on the same DP array. 161 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/min_cost_path.ml | 31 +++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 42 insertions(+) create mode 100644 lib/ocaml/baseline/min_cost_path.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f83a769d..d3f2cbbf 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -101,6 +101,7 @@ "merge_intervals.ml": 12, "merge_sort.ml": 44, "merge_two.ml": 441, + "min_cost_path.ml": 12, "module_use.ml": 3, "monotonic.ml": 4, "newton_sqrt.ml": 1414, diff --git a/lib/ocaml/baseline/min_cost_path.ml b/lib/ocaml/baseline/min_cost_path.ml new file mode 100644 index 00000000..0e385c2e --- /dev/null +++ b/lib/ocaml/baseline/min_cost_path.ml @@ -0,0 +1,31 @@ +let h = 4 +let w = 4 + +let cost = [| + [| 1; 3; 1; 2 |]; + [| 1; 5; 1; 3 |]; + [| 4; 2; 1; 4 |]; + [| 1; 6; 2; 3 |] +|] + +let min_cost_path () = + let dp = Array.init h (fun _ -> Array.make w 0) in + dp.(0).(0) <- cost.(0).(0); + for j = 1 to w - 1 do + dp.(0).(j) <- dp.(0).(j - 1) + cost.(0).(j) + done; + for i = 1 to h - 1 do + dp.(i).(0) <- dp.(i - 1).(0) + cost.(i).(0) + done; + for i = 1 to h - 1 do + for j = 1 to w - 1 do + let a = dp.(i - 1).(j) in + let b = dp.(i).(j - 1) in + dp.(i).(j) <- (if a < b then a else b) + cost.(i).(j) + done + done; + dp.(h - 1).(w - 1) + +;; + +min_cost_path () diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 935dd1d9..e8ec7f72 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — min_cost_path.ml baseline (min-cost path + through 4×4 cost grid, top-left to bottom-right with moves + right/down only, optimal = 12). Standard 2D DP: dp[i][j] = min + of (dp[i-1][j], dp[i][j-1]) + cost[i][j]. Cost matrix yields + optimal path 1→1→2→1→1→1→2→3 = 12 (visiting (0,0),(1,0),(2,1), + (2,2),(2,3),(3,3)? actually the path is row-by-row). Tests + nested 2D arrays via Array.init + Array.make, double-nested + for-loops with branched edges (first row, first column, then + general case), `dp.(i-1).(j)` 2-D index read + `dp.(i).(j)<-` + 2-D write. 161 baseline programs total. - 2026-05-10 Phase 5.1 — topo_dfs.ml baseline (DFS-based topo sort on the same 6-node DAG as topo_sort.ml, digit-fingerprint 24135). Cons each node onto the order list AFTER recursing on all its From 62712accdd93dc82601e049b6fa2a35cc67c9047 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 22:47:22 +0000 Subject: [PATCH 256/298] ocaml: phase 5.1 polygon_area.ml baseline (pentagon 2x area = 32) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shoelace formula on a pentagon with integer vertices: pts = [(0,0); (4,0); (4,3); (2,5); (0,3)] 2 * area = | Σ (x_i * y_{i+1} - x_{i+1} * y_i) | = | 0*0 - 4*0 + 4*3 - 4*0 + 4*5 - 2*3 + 2*3 - 0*5 + 0*0 - 0*3 | = 32 Returns the doubled form (32) to stay integral. Tests: - let (x1, y1) = arr.(i) in -- tuple destructure from array - arr.((i + 1) mod n) -- modular wrap-around index - if a < 0 then - a else a -- prefix - negation 162 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/polygon_area.ml | 17 +++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/polygon_area.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d3f2cbbf..4dc27692 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -122,6 +122,7 @@ "prefix_sum.ml": 66, "pretty_table.ml": 64, "poly_stack.ml": 5, + "polygon_area.ml": 32, "pow_mod.ml": 738639, "prime_factors.ml": 17, "pythagorean.ml": 16, diff --git a/lib/ocaml/baseline/polygon_area.ml b/lib/ocaml/baseline/polygon_area.ml new file mode 100644 index 00000000..a22d13e2 --- /dev/null +++ b/lib/ocaml/baseline/polygon_area.ml @@ -0,0 +1,17 @@ +let pts = [(0, 0); (4, 0); (4, 3); (2, 5); (0, 3)] + +let polygon_2area pts = + let arr = Array.of_list pts in + let n = Array.length arr in + let acc = ref 0 in + for i = 0 to n - 1 do + let (x1, y1) = arr.(i) in + let (x2, y2) = arr.((i + 1) mod n) in + acc := !acc + x1 * y2 - x2 * y1 + done; + let a = !acc in + if a < 0 then - a else a + +;; + +polygon_2area pts diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e8ec7f72..b607b3c5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — polygon_area.ml baseline (shoelace formula + on pentagon, returns 2× area = 32). Vertices (0,0), (4,0), (4,3), + (2,5), (0,3); shoelace sum |Σ(x_i·y_{i+1} − x_{i+1}·y_i)| = 32 so + the area is 16, and we return the doubled form to stay integral. + Tests `let (x1, y1) = arr.(i) in` tuple destructuring from array + access (previously suspected broken in bfs_grid but works here — + the iter-242 bug was actually `&&` short-circuit, not destructure), + modular wrap-around `arr.((i+1) mod n)`, prefix-`-` negation. + 162 baseline programs total. - 2026-05-10 Phase 5.1 — min_cost_path.ml baseline (min-cost path through 4×4 cost grid, top-left to bottom-right with moves right/down only, optimal = 12). Standard 2D DP: dp[i][j] = min From 32f6c4ee0c6108b9bbb8b5896d810cadfd7c94df Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 22:58:13 +0000 Subject: [PATCH 257/298] ocaml: phase 5.1 egg_drop.ml baseline (2 eggs, 36 floors -> 8 trials) Classic egg-drop puzzle DP: dp[e][f] = 1 + min over k in [1, f] of max(dp[e-1][k-1], dp[e][f-k]) For 2 eggs over 36 floors, the optimal worst-case is 8 trials (closed form: triangular number bound). Tests 2D DP with triple-nested for-loops, max-of-two via inline if, large sentinel constant (100000000), mixed shifted indexing (e-1) and (f-k) where both shift independently. 163 baseline programs total. --- lib/ocaml/baseline/egg_drop.ml | 26 ++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 35 insertions(+) create mode 100644 lib/ocaml/baseline/egg_drop.ml diff --git a/lib/ocaml/baseline/egg_drop.ml b/lib/ocaml/baseline/egg_drop.ml new file mode 100644 index 00000000..756300ec --- /dev/null +++ b/lib/ocaml/baseline/egg_drop.ml @@ -0,0 +1,26 @@ +let egg_drop eggs floors = + let dp = Array.init (eggs + 1) (fun _ -> Array.make (floors + 1) 0) in + for f = 1 to floors do + dp.(1).(f) <- f + done; + for e = 1 to eggs do + dp.(e).(0) <- 0; + dp.(e).(1) <- 1 + done; + for e = 2 to eggs do + for f = 2 to floors do + let best = ref 100000000 in + for k = 1 to f do + let bre = dp.(e - 1).(k - 1) in + let sur = dp.(e).(f - k) in + let cand = 1 + (if bre > sur then bre else sur) in + if cand < !best then best := cand + done; + dp.(e).(f) <- !best + done + done; + dp.(eggs).(floors) + +;; + +egg_drop 2 36 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 4dc27692..84cee1ff 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -29,6 +29,7 @@ "count_change.ml": 406, "count_inversions.ml": 12, "csv.ml": 10, + "egg_drop.ml": 8, "dijkstra.ml": 7, "exception_handle.ml": 4, "exception_user.ml": 26, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index b607b3c5..c49c7565 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — egg_drop.ml baseline (worst-case trials + for 2 eggs, 36 floors = 8). Classic O(e·f²) DP: dp[e][f] = 1 + + min over k of max(dp[e-1][k-1], dp[e][f-k]). Closed form via + triangular numbers gives ⌈(√(1+8·36)−1)/2⌉ = 8, matching the + DP answer. Tests 2D DP with triple-nested for-loops, max-of-two + via inline if, large sentinel constant, mixed indexing (e-1) + and (f-k) where both shift independently. 163 baseline programs + total. - 2026-05-10 Phase 5.1 — polygon_area.ml baseline (shoelace formula on pentagon, returns 2× area = 32). Vertices (0,0), (4,0), (4,3), (2,5), (0,3); shoelace sum |Σ(x_i·y_{i+1} − x_{i+1}·y_i)| = 32 so From d1a4616ac423e9b8755b39e05e9713333bc54a49 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 23:08:16 +0000 Subject: [PATCH 258/298] ocaml: phase 5.1 bipartite.ml baseline (7-node bipartite, 4 in color 0) BFS-based 2-coloring of an undirected graph: edges (undirected): 0-1 0-3 1-2 1-4 2-5 3-4 3-6 5-6 Partition: {0, 2, 4, 6} vs {1, 3, 5} (no odd cycles) Returns count of color-0 vertices (4) on success, -1 on odd-cycle detection. Tests Queue-based BFS with a source-loop wrapper for disconnected graphs, `1 - color.(u)` toggle, color-equality conflict check on already-colored neighbors. 164 baseline programs total. --- lib/ocaml/baseline/bipartite.ml | 39 ++++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 49 insertions(+) create mode 100644 lib/ocaml/baseline/bipartite.ml diff --git a/lib/ocaml/baseline/bipartite.ml b/lib/ocaml/baseline/bipartite.ml new file mode 100644 index 00000000..cb027281 --- /dev/null +++ b/lib/ocaml/baseline/bipartite.ml @@ -0,0 +1,39 @@ +let is_bipartite n adj = + let color = Array.make n (-1) in + let ok = ref true in + let q = Queue.create () in + for src = 0 to n - 1 do + if color.(src) = -1 then begin + color.(src) <- 0; + Queue.push src q; + while not (Queue.is_empty q) do + let u = Queue.pop q in + List.iter (fun v -> + if color.(v) = -1 then begin + color.(v) <- 1 - color.(u); + Queue.push v q + end else if color.(v) = color.(u) then + ok := false + ) adj.(u) + done + end + done; + let zeros = ref 0 in + for i = 0 to n - 1 do + if color.(i) = 0 then zeros := !zeros + 1 + done; + if !ok then !zeros else -1 + +;; + +let n = 7 in +let adj = [| + [1; 3]; + [0; 2; 4]; + [1; 5]; + [0; 4; 6]; + [1; 3]; + [2; 6]; + [3; 5] +|] in +is_bipartite n adj diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 84cee1ff..7e246220 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -12,6 +12,7 @@ "bisect.ml": 141, "bigint_add.ml": 28, "binary_heap.ml": 123456789, + "bipartite.ml": 4, "bits.ml": 21, "balance.ml": 3, "base_n.ml": 17, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c49c7565..e79e89ab 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — bipartite.ml baseline (BFS 2-coloring on + 7-node cycle-rich graph → bipartite with 4 vertices in color 0). + Edges: 0-1, 0-3, 1-2, 1-4, 2-5, 3-4, 3-6, 5-6. This forms a + bipartite graph with partition {0,2,4,6} vs {1,3,5}. Returns + count of color-0 vertices (4) on success, -1 on odd-cycle + detection. Tests Queue-based BFS with module-level state in a + loop over all sources (handles disconnected graphs), `1 - color.(u)` + binary toggle, color-equality conflict check. 164 baseline + programs total. - 2026-05-10 Phase 5.1 — egg_drop.ml baseline (worst-case trials for 2 eggs, 36 floors = 8). Classic O(e·f²) DP: dp[e][f] = 1 + min over k of max(dp[e-1][k-1], dp[e][f-k]). Closed form via From 689438d12e1f532b68e7402f1ce890ad6b257236 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 23:18:26 +0000 Subject: [PATCH 259/298] ocaml: phase 5.1 matrix_power.ml baseline (F(30) = 832040 via 2x2 matrix pow) Fibonacci via repeated-squaring matrix exponentiation: [[1, 1], [1, 0]] ^ n = [[F(n+1), F(n)], [F(n), F(n-1)]] Recursive O(log n) power: let rec mpow m n = if n = 0 then identity else if n mod 2 = 0 then let h = mpow m (n / 2) in mul h h else mul m (mpow m (n - 1)) Returns the .b cell after raising to the 30th power -> 832040 = F(30). Tests record literal construction inside recursive function returns, record field access (x.a etc), and pure integer arithmetic in the matrix multiply. 165 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/matrix_power.ml | 20 ++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 29 insertions(+) create mode 100644 lib/ocaml/baseline/matrix_power.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7e246220..bb4040eb 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -82,6 +82,7 @@ "list_ops.ml": 30, "luhn.ml": 2, "mat_mul.ml": 621, + "matrix_power.ml": 832040, "max_path_tree.ml": 11, "max_product3.ml": 300, "max_run.ml": 5, diff --git a/lib/ocaml/baseline/matrix_power.ml b/lib/ocaml/baseline/matrix_power.ml new file mode 100644 index 00000000..a6b630b3 --- /dev/null +++ b/lib/ocaml/baseline/matrix_power.ml @@ -0,0 +1,20 @@ +type m22 = { a : int; b : int; c : int; d : int } + +let mul x y = + { a = x.a * y.a + x.b * y.c; + b = x.a * y.b + x.b * y.d; + c = x.c * y.a + x.d * y.c; + d = x.c * y.b + x.d * y.d } + +let rec mpow m n = + if n = 0 then { a = 1; b = 0; c = 0; d = 1 } + else if n mod 2 = 0 then + let h = mpow m (n / 2) in mul h h + else + mul m (mpow m (n - 1)) + +;; + +let fib_matrix = { a = 1; b = 1; c = 1; d = 0 } in +let r = mpow fib_matrix 30 in +r.b diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index e79e89ab..1f01de16 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — matrix_power.ml baseline (Fibonacci via + 2×2 matrix fast exponentiation, F(30) = 832040). [[1,1],[1,0]]^n + has Fibonacci numbers in the top row; recursive O(log n) power + via repeated squaring on even n, multiply by base on odd n. + Records `{a; b; c; d}` standing in for matrix entries. F(30) = + 832040 matches the closed form. Tests record literal construction + inside recursive function returns, record field access (x.a etc), + arithmetic on integers (no float). 165 baseline programs total. - 2026-05-10 Phase 5.1 — bipartite.ml baseline (BFS 2-coloring on 7-node cycle-rich graph → bipartite with 4 vertices in color 0). Edges: 0-1, 0-3, 1-2, 1-4, 2-5, 3-4, 3-6, 5-6. This forms a From 3fe3b7b66fe0721eae9197f84e87354eb3f842a6 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 23:28:29 +0000 Subject: [PATCH 260/298] ocaml: phase 5.1 magic_square.ml baseline (5x5 Siamese, diag sum = 65) Siamese construction for odd-order magic squares: - place 1 at (0, n/2) - for k = 2..n^2, move up-right with (x-1+n) mod n wrap - if the target cell is taken, drop down one row instead for n=5, magic constant = n*(n^2+1)/2 = 5*26/2 = 65 Returns the main-diagonal sum (65 by construction). Tests 2D array via Array.init + Array.make, mod arithmetic with the (x-1+n) mod n idiom for negative-safe wrap, nested begin/end branches inside for-loop body. 166 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/magic_square.ml | 27 +++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 37 insertions(+) create mode 100644 lib/ocaml/baseline/magic_square.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index bb4040eb..c6511a5d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -81,6 +81,7 @@ "lis.ml": 6, "list_ops.ml": 30, "luhn.ml": 2, + "magic_square.ml": 65, "mat_mul.ml": 621, "matrix_power.ml": 832040, "max_path_tree.ml": 11, diff --git a/lib/ocaml/baseline/magic_square.ml b/lib/ocaml/baseline/magic_square.ml new file mode 100644 index 00000000..f2b6ff5f --- /dev/null +++ b/lib/ocaml/baseline/magic_square.ml @@ -0,0 +1,27 @@ +let n = 5 + +let make_magic () = + let m = Array.init n (fun _ -> Array.make n 0) in + let row = ref 0 in + let col = ref (n / 2) in + for k = 1 to n * n do + m.(!row).(!col) <- k; + let nr = (!row - 1 + n) mod n in + let nc = (!col + 1) mod n in + if m.(nr).(nc) <> 0 then begin + row := (!row + 1) mod n + end else begin + row := nr; + col := nc + end + done; + m + +;; + +let m = make_magic () in +let sum_diag = ref 0 in +for i = 0 to n - 1 do + sum_diag := !sum_diag + m.(i).(i) +done; +!sum_diag diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1f01de16..5b8a4a48 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — magic_square.ml baseline (5×5 Siamese + construction → main-diagonal sum 65, the magic constant for n=5). + Place k=1 at (0, n/2); for each next k, move up-right with wrap- + around; if that cell is taken, move down one row instead. + Magic constant for an n×n square is n(n²+1)/2 = 5·26/2 = 65. + Tests 2D array via Array.init + Array.make, mod arithmetic with + the `(x - 1 + n) mod n` idiom for negative-safe wrap, nested + begin/end branches inside for-loop body. 166 baseline programs + total. - 2026-05-10 Phase 5.1 — matrix_power.ml baseline (Fibonacci via 2×2 matrix fast exponentiation, F(30) = 832040). [[1,1],[1,0]]^n has Fibonacci numbers in the top row; recursive O(log n) power From 175a77fba586976d4919d89c4c0eee124ee75d65 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 23:38:40 +0000 Subject: [PATCH 261/298] ocaml: phase 5.1 segment_tree.ml baseline (range-sum tree, fingerprint 4232) Power-of-two-indexed segment tree over [1;3;5;7;9;11;13;15]: build sums (root holds total = 64) query returns range sum in O(log n) update propagates a point delta back up the path Sequence: r1 = query [2,5] = 5 + 7 + 9 + 11 = 32 update idx 3 += 10 (so a[3] becomes 17) r2 = query [2,5] = 5 + 17 + 9 + 11 = 42 encoded fingerprint = r1 + r2*100 = 32 + 4200 = 4232 Tests three mutually independent recursive functions with array index arithmetic on 2*node / 2*node+1, half-bisection on mid = (l+r)/2, bottom-up combine pattern. 167 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/segment_tree.ml | 38 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/segment_tree.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c6511a5d..f028eba3 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -141,6 +141,7 @@ "run_decode.ml": 21, "run_length.ml": 11, "safe_div.ml": 20, + "segment_tree.ml": 4232, "shuffle.ml": 55, "simpson_int.ml": 10000, "stable_unique.ml": 46, diff --git a/lib/ocaml/baseline/segment_tree.ml b/lib/ocaml/baseline/segment_tree.ml new file mode 100644 index 00000000..d878f996 --- /dev/null +++ b/lib/ocaml/baseline/segment_tree.ml @@ -0,0 +1,38 @@ +let n = 8 + +let st = Array.make (4 * n) 0 + +let rec build a l r node = + if l = r then st.(node) <- a.(l) + else begin + let mid = (l + r) / 2 in + build a l mid (2 * node); + build a (mid + 1) r (2 * node + 1); + st.(node) <- st.(2 * node) + st.(2 * node + 1) + end + +let rec query l r ql qr node = + if qr < l || ql > r then 0 + else if ql <= l && r <= qr then st.(node) + else begin + let mid = (l + r) / 2 in + query l mid ql qr (2 * node) + query (mid + 1) r ql qr (2 * node + 1) + end + +let rec update l r idx delta node = + if l = r then st.(node) <- st.(node) + delta + else begin + let mid = (l + r) / 2 in + if idx <= mid then update l mid idx delta (2 * node) + else update (mid + 1) r idx delta (2 * node + 1); + st.(node) <- st.(2 * node) + st.(2 * node + 1) + end + +;; + +let a = [| 1; 3; 5; 7; 9; 11; 13; 15 |] in +build a 0 (n - 1) 1; +let r1 = query 0 (n - 1) 2 5 1 in +update 0 (n - 1) 3 10 1; +let r2 = query 0 (n - 1) 2 5 1 in +r1 + r2 * 100 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5b8a4a48..930aadcd 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — segment_tree.ml baseline (range-sum + segment tree over [1;3;5;7;9;11;13;15] with one point update, + encoded r1 + r2*100 = 4232). build/query/update use the standard + power-of-two indexing (node, 2·node, 2·node+1) over a flat + 4n-sized array. First range query 5+7+9+11=32; after adding 10 + to index 3, the same range = 5+17+9+11=42. Encoded: 32 + 42*100 + = 4232. Tests three mutually independent recursive functions + with array index arithmetic, half-bisection on `mid = (l + r) / 2`, + bottom-up combine pattern. 167 baseline programs total. - 2026-05-10 Phase 5.1 — magic_square.ml baseline (5×5 Siamese construction → main-diagonal sum 65, the magic constant for n=5). Place k=1 at (0, n/2); for each next k, move up-right with wrap- From cb626fc402d751e96703722942844f53f180906d Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 23:48:46 +0000 Subject: [PATCH 262/298] ocaml: phase 5.1 fenwick_tree.ml baseline (BIT over 8 elements, fingerprint 228) Fenwick / Binary Indexed Tree for prefix sums. The classic `i & -i` low-bit trick needs negative-aware AND, but our `land` evaluator (iter 127, bitwise via floor/mod arithmetic) only handles non-negative operands. Workaround: a portable lowbit helper that finds the largest power of 2 dividing i: let lowbit i = let r = ref 1 in while !r * 2 <= i && i mod (!r * 2) = 0 do r := !r * 2 done; !r After building from [1;3;5;7;9;11;13;15]: total = prefix_sum 8 = 64 update 1 by +100 after = prefix_sum 8 = 164 total + after = 228 Tests recursive update / prefix_sum chains via helper-extracted lowbit; documents a non-obvious limit of the bitwise-emulation layer. 168 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/fenwick_tree.ml | 33 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 11 ++++++++++ 3 files changed, 45 insertions(+) create mode 100644 lib/ocaml/baseline/fenwick_tree.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index f028eba3..864e52a3 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -56,6 +56,7 @@ "expr_eval.ml": 16, "expr_simp.ml": 22, "factorial.ml": 3628800, + "fenwick_tree.ml": 228, "fib_doubling.ml": 102334155, "fib_mod.ml": 391360, "fraction.ml": 7, diff --git a/lib/ocaml/baseline/fenwick_tree.ml b/lib/ocaml/baseline/fenwick_tree.ml new file mode 100644 index 00000000..8b4a2aa7 --- /dev/null +++ b/lib/ocaml/baseline/fenwick_tree.ml @@ -0,0 +1,33 @@ +let n = 8 + +let bit = Array.make (n + 1) 0 + +let lowbit i = + let r = ref 1 in + while !r * 2 <= i && i mod (!r * 2) = 0 do + r := !r * 2 + done; + !r + +let rec update i delta = + if i <= n then begin + bit.(i) <- bit.(i) + delta; + update (i + lowbit i) delta + end + +let rec prefix_sum i = + if i <= 0 then 0 + else bit.(i) + prefix_sum (i - lowbit i) + +let range_sum l r = prefix_sum r - prefix_sum (l - 1) + +;; + +let a = [| 1; 3; 5; 7; 9; 11; 13; 15 |] in +for i = 0 to n - 1 do + update (i + 1) a.(i) +done; +let total = prefix_sum n in +update 1 100; +let after = prefix_sum n in +total + after diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 930aadcd..2b0d06f3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — fenwick_tree.ml baseline (Binary Indexed + Tree over [1;3;5;7;9;11;13;15], total + after = 228). Initial + prefix_sum n = 64; after +100 at index 1, prefix_sum n = 164; + 64 + 164 = 228. Because our `land` evaluator implementation only + handles non-negative operands (iter 127 bitwise-via-arithmetic + workaround), and Fenwick relies on `i & -i` to extract the + lowest set bit, we replace `i land (-i)` with a portable + `lowbit` helper: largest power-of-2 dividing i. Tests recursive + update / prefix_sum chains using helper-extracted lowbit, + highlights a non-obvious tradeoff in the bitwise-emulation + layer. 168 baseline programs total. - 2026-05-10 Phase 5.1 — segment_tree.ml baseline (range-sum segment tree over [1;3;5;7;9;11;13;15] with one point update, encoded r1 + r2*100 = 4232). build/query/update use the standard From ca34cede8864d235ed6394d36c6454b7d297a652 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 23:58:50 +0000 Subject: [PATCH 263/298] ocaml: phase 5.1 next_greater.ml baseline (monotonic stack, sum 153) Right-to-left monotonic stack for next-greater-element: for i = n - 1 downto 0 do while (match !stack with [] -> false | h :: _ -> h <= arr.(i)) do stack := List.tl !stack done; (match !stack with | [] -> res.(i) <- -1 | h :: _ -> res.(i) <- h); stack := arr.(i) :: !stack done For [4; 5; 2; 25; 7; 8; 1; 30; 12]: results: [5; 25; 25; 30; 8; 30; 30; -1; -1] sum of non-negative = 5+25+25+30+8+30+30 = 153 Tests stack as ref list with match-driven peek, match-as-bool in while-guard, inline parenthesized match driving <-. 169 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/next_greater.ml | 23 +++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 33 insertions(+) create mode 100644 lib/ocaml/baseline/next_greater.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 864e52a3..d06b991b 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -110,6 +110,7 @@ "module_use.ml": 3, "monotonic.ml": 4, "newton_sqrt.ml": 1414, + "next_greater.ml": 153, "next_permutation.ml": 119, "number_words.ml": 106, "mutable_record.ml": 10, diff --git a/lib/ocaml/baseline/next_greater.ml b/lib/ocaml/baseline/next_greater.ml new file mode 100644 index 00000000..9c09fb3e --- /dev/null +++ b/lib/ocaml/baseline/next_greater.ml @@ -0,0 +1,23 @@ +let next_greater arr = + let n = Array.length arr in + let res = Array.make n (-1) in + let stack = ref [] in + for i = n - 1 downto 0 do + while (match !stack with [] -> false | h :: _ -> h <= arr.(i)) do + stack := List.tl !stack + done; + (match !stack with + | [] -> res.(i) <- -1 + | h :: _ -> res.(i) <- h); + stack := arr.(i) :: !stack + done; + res + +;; + +let r = next_greater [| 4; 5; 2; 25; 7; 8; 1; 30; 12 |] in +let sum = ref 0 in +for i = 0 to Array.length r - 1 do + if r.(i) >= 0 then sum := !sum + r.(i) +done; +!sum diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2b0d06f3..798a952a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — next_greater.ml baseline (monotonic stack + for next-greater-element over [4;5;2;25;7;8;1;30;12], sum of + successors = 153). Right-to-left scan with a stack that pops + elements ≤ current. Result: [5,25,25,30,8,30,30,-1,-1]; sum of + non-negative entries = 5+25+25+30+8+30+30 = 153. Tests stack + encoded as `ref list` with match-driven peek, `match … with [] + -> false | h :: _ -> …` boolean coercion in `while` guard, + inline match-statement on rhs of `<-` (paren-wrapped for parse). + 169 baseline programs total. - 2026-05-10 Phase 5.1 — fenwick_tree.ml baseline (Binary Indexed Tree over [1;3;5;7;9;11;13;15], total + after = 228). Initial prefix_sum n = 64; after +100 at index 1, prefix_sum n = 164; From 1d1c35a438522359f9580e383246cb90010346e7 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 00:09:06 +0000 Subject: [PATCH 264/298] ocaml: phase 5.1 convex_hull.ml baseline (Andrew monotone chain, 5 vertices) Andrew's monotone chain hull over 8 integer points: pts = [(0,0); (1,1); (2,0); (2,2); (0,2); (1,0); (3,3); (5,1)] Sort lex, build lower hull L->R then upper R->L, popping while the cross product is non-positive (collinear included on hull). Hull traverse: (0,0) -> (2,0) -> (5,1) -> (3,3) -> (0,2) = 5 ((2,0) lies on the lower edge from (0,0) to (5,1)). Tests List.sort with 2-tuple comparator using nested pair destructure, repeated `let (x, y) = arr.(i) in` array tuple destructure across both passes, while + cont-flag pattern. 170 baseline programs total. --- lib/ocaml/baseline/convex_hull.ml | 43 +++++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 11 ++++++++ 3 files changed, 55 insertions(+) create mode 100644 lib/ocaml/baseline/convex_hull.ml diff --git a/lib/ocaml/baseline/convex_hull.ml b/lib/ocaml/baseline/convex_hull.ml new file mode 100644 index 00000000..575c0387 --- /dev/null +++ b/lib/ocaml/baseline/convex_hull.ml @@ -0,0 +1,43 @@ +let cross ox oy ax ay bx by = + (ax - ox) * (by - oy) - (ay - oy) * (bx - ox) + +let hull_size pts = + let n = List.length pts in + if n < 3 then n + else begin + let sorted = List.sort (fun (a, b) (c, d) -> + if a <> c then compare a c else compare b d) pts in + let arr = Array.of_list sorted in + let h = Array.make (2 * n) (0, 0) in + let k = ref 0 in + for i = 0 to n - 1 do + let (xi, yi) = arr.(i) in + let cont = ref true in + while !cont && !k >= 2 do + let (ox, oy) = h.(!k - 2) in + let (ax, ay) = h.(!k - 1) in + if cross ox oy ax ay xi yi <= 0 then k := !k - 1 + else cont := false + done; + h.(!k) <- (xi, yi); + k := !k + 1 + done; + let lo = !k + 1 in + for i = n - 2 downto 0 do + let (xi, yi) = arr.(i) in + let cont = ref true in + while !cont && !k >= lo do + let (ox, oy) = h.(!k - 2) in + let (ax, ay) = h.(!k - 1) in + if cross ox oy ax ay xi yi <= 0 then k := !k - 1 + else cont := false + done; + h.(!k) <- (xi, yi); + k := !k + 1 + done; + !k - 1 + end + +;; + +hull_size [(0, 0); (1, 1); (2, 0); (2, 2); (0, 2); (1, 0); (3, 3); (5, 1)] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index d06b991b..8f4df2a9 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -25,6 +25,7 @@ "calc.ml": 13, "catalan.ml": 42, "closures.ml": 315, + "convex_hull.ml": 5, "coin_change.ml": 6, "coin_min.ml": 6, "count_change.ml": 406, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 798a952a..292124dc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — convex_hull.ml baseline (Andrew's monotone + chain over 8 points → 5-vertex convex hull). Sorts points + lexicographically, then builds lower hull left-to-right and upper + hull right-to-left, popping back when the cross product turns the + wrong way. Points: (0,0), (1,1), (2,0), (2,2), (0,2), (1,0), + (3,3), (5,1). Hull = (0,0) → (2,0) → (5,1) → (3,3) → (0,2) = 5 + vertices ((2,0) lies on lower edge, included by the ≤ test). + Tests List.sort with a 2-tuple comparator using nested pair + destructure, repeated `let (x, y) = arr.(i) in` array tuple + destructure across both passes, while + cont-flag pattern instead + of break. 170 baseline programs total. - 2026-05-10 Phase 5.1 — next_greater.ml baseline (monotonic stack for next-greater-element over [4;5;2;25;7;8;1;30;12], sum of successors = 153). Right-to-left scan with a stack that pops From fd1f94f292a41fd8c775c4a4225be25957d46894 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 00:20:30 +0000 Subject: [PATCH 265/298] ocaml: phase 5.1 lru_cache.ml baseline (cap=3 LRU, fingerprint 499) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functional LRU cache via association-list ordered most-recent-first. Get / put both: - find or remove the existing entry - cons the fresh (k, v) to the front - on put, trim the tail when over capacity Sequence: put 1 100; put 2 200; put 3 300 a = get 1 -> 100 (moves 1 to front) put 4 400 (evicts 2) b = get 2 -> -1 (no longer cached) c = get 3 -> 300 d = get 1 -> 100 a + b + c + d = 499 Tests `match … with (k', v) :: rest when k' = k -> …` tuple-cons patterns with `when` guards, `function` keyword for arg-less match, recursive find/remove/take over the same list. Parser limit found: `match n, lst with` ad-hoc tuple-scrutinee is not yet supported (got "expected op -> got op ,"); workaround uses outer `if` plus inner match. 171 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/lru_cache.ml | 51 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++ 3 files changed, 63 insertions(+) create mode 100644 lib/ocaml/baseline/lru_cache.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 8f4df2a9..90b74c93 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -82,6 +82,7 @@ "floyd_warshall.ml": 9, "lis.ml": 6, "list_ops.ml": 30, + "lru_cache.ml": 499, "luhn.ml": 2, "magic_square.ml": 65, "mat_mul.ml": 621, diff --git a/lib/ocaml/baseline/lru_cache.ml b/lib/ocaml/baseline/lru_cache.ml new file mode 100644 index 00000000..9153db10 --- /dev/null +++ b/lib/ocaml/baseline/lru_cache.ml @@ -0,0 +1,51 @@ +let cache = ref [] +let cap = 3 + +let get k = + let rec find = function + | [] -> None + | (k', v) :: _ when k' = k -> Some v + | _ :: rest -> find rest + in + match find !cache with + | None -> -1 + | Some v -> + let rec remove = function + | [] -> [] + | (k', _) :: rest when k' = k -> rest + | h :: rest -> h :: remove rest + in + cache := (k, v) :: remove !cache; + v + +let put k v = + let rec remove = function + | [] -> [] + | (k', _) :: rest when k' = k -> rest + | h :: rest -> h :: remove rest + in + let cleaned = remove !cache in + let trimmed = + if List.length cleaned >= cap then + let rec take n lst = + if n = 0 then [] + else match lst with + | [] -> [] + | h :: r -> h :: take (n - 1) r + in + take (cap - 1) cleaned + else cleaned + in + cache := (k, v) :: trimmed + +;; + +put 1 100; +put 2 200; +put 3 300; +let a = get 1 in +put 4 400; +let b = get 2 in +let c = get 3 in +let d = get 1 in +a + b + c + d diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 292124dc..5f209ede 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — lru_cache.ml baseline (list-backed LRU + cache of capacity 3, fingerprint 100 + (-1) + 300 + 100 = 499). + Each `get` / `put` removes the existing entry then conses the + fresh one to the front; `put` evicts the tail when over capacity. + Tests `match … with [] -> … | (k', v) :: rest when k' = k -> …` + pattern matching with `when` guards over tuple-cons patterns, + `function` keyword for short scrutinee-free matches, recursive + `find` / `remove` / `take` over the same list. Note: parser does + not yet handle `match n, lst with` ad-hoc tuple-scrutinee (got + "expected op -> got op ,"); workaround uses an outer `if` then + inner `match lst with`. 171 baseline programs total. - 2026-05-11 Phase 5.1 — convex_hull.ml baseline (Andrew's monotone chain over 8 points → 5-vertex convex hull). Sorts points lexicographically, then builds lower hull left-to-right and upper From 800dca67cab52b26281026100a9d83259db70b0d Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 00:31:08 +0000 Subject: [PATCH 266/298] ocaml: parser accepts top-level tuple patterns in match cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real OCaml accepts `match e1, e2 with | p1, p2 -> …` without surrounding parens. parse-pattern previously stopped at the cons layer (`p :: rest`) and treated a trailing `,` as a separator the outer caller couldn't handle, surfacing as "expected op -> got op ,". Fix: `parse-pattern` now collects comma-separated patterns into a :ptuple after parse-pattern-cons, before the optional `as` alias. The scrutinee side already built tuples via parse-tuple, so both sides are now symmetric. lru_cache.ml (iter 258) reverts its workaround back to the natural form: let rec take n lst = match n, lst with | 0, _ -> [] | _, [] -> [] | _, h :: r -> h :: take (n - 1) r 607/607 regressions clean. --- lib/ocaml/baseline/lru_cache.ml | 9 ++++----- lib/ocaml/parser.sx | 30 +++++++++++++++++++++++++++--- plans/ocaml-on-sx.md | 14 ++++++++++---- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/lib/ocaml/baseline/lru_cache.ml b/lib/ocaml/baseline/lru_cache.ml index 9153db10..51fb6533 100644 --- a/lib/ocaml/baseline/lru_cache.ml +++ b/lib/ocaml/baseline/lru_cache.ml @@ -27,11 +27,10 @@ let put k v = let cleaned = remove !cache in let trimmed = if List.length cleaned >= cap then - let rec take n lst = - if n = 0 then [] - else match lst with - | [] -> [] - | h :: r -> h :: take (n - 1) r + let rec take n lst = match n, lst with + | 0, _ -> [] + | _, [] -> [] + | _, h :: r -> h :: take (n - 1) r in take (cap - 1) cleaned else cleaned diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 941507ea..25d795c1 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -328,14 +328,38 @@ ;; use `(A | B)` if needed in the future via a parens-only or. (set! parse-pattern (fn () - (let ((p (parse-pattern-cons))) + ;; Top-level pattern: cons-pat, optionally followed by + ;; comma-separated patterns for ad-hoc tuple matching + ;; (`match e1, e2 with | p1, p2 -> …`), optionally aliased + ;; with `as name`. + (let ((first (parse-pattern-cons))) (cond + ((at-op? ",") + (let ((items (list first))) + (begin + (define loop-comma + (fn () + (when (at-op? ",") + (begin + (advance-tok!) + (append! items (parse-pattern-cons)) + (loop-comma))))) + (loop-comma) + (let ((p (cons :ptuple items))) + (cond + ((at-kw? "as") + (begin + (advance-tok!) + (let ((n (ocaml-tok-value + (consume! "ident" nil)))) + (list :pas p n)))) + (else p)))))) ((at-kw? "as") (begin (advance-tok!) (let ((n (ocaml-tok-value (consume! "ident" nil)))) - (list :pas p n)))) - (else p))))) + (list :pas first n)))) + (else first))))) (define peek-tok-at (fn (n) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5f209ede..24604c06 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — parser: top-level tuple patterns + (`match e1, e2 with | p1, p2 -> …`). `parse-pattern` now collects + comma-separated patterns into a `:ptuple`, mirroring the scrutinee + side which already builds tuples via `parse-tuple`. Real OCaml + accepts this ad-hoc tuple shorthand without surrounding parens. + lru_cache.ml reverts the iter-258 workaround back to the natural + `match n, lst with | 0, _ -> [] | _, [] -> [] | _, h :: r -> …` + form. 607/607 regressions clean. - 2026-05-11 Phase 5.1 — lru_cache.ml baseline (list-backed LRU cache of capacity 3, fingerprint 100 + (-1) + 300 + 100 = 499). Each `get` / `put` removes the existing entry then conses the @@ -414,10 +422,8 @@ _Newest first._ Tests `match … with [] -> … | (k', v) :: rest when k' = k -> …` pattern matching with `when` guards over tuple-cons patterns, `function` keyword for short scrutinee-free matches, recursive - `find` / `remove` / `take` over the same list. Note: parser does - not yet handle `match n, lst with` ad-hoc tuple-scrutinee (got - "expected op -> got op ,"); workaround uses an outer `if` then - inner `match lst with`. 171 baseline programs total. + `find` / `remove` / `take` over the same list. 171 baseline + programs total. - 2026-05-11 Phase 5.1 — convex_hull.ml baseline (Andrew's monotone chain over 8 points → 5-vertex convex hull). Sorts points lexicographically, then builds lower hull left-to-right and upper From 0a3f02d63601d284af668804122bbbf0ef661456 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 00:42:08 +0000 Subject: [PATCH 267/298] ocaml: phase 5.1 powerset_target.ml baseline (subsets of {1..10} summing to 15 = 20) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recursive powerset construction by doubling: let rec gen xs = match xs with | [] -> [[]] | h :: rest -> let sub = gen rest in sub @ List.map (fun s -> h :: s) sub Enumerates all 2^10 = 1024 subsets, filters by sum: count = |{ S ⊆ {1..10} | Σ S = 15 }| = 20 Examples: {1,2,3,4,5}, {2,3,4,6}, {1,4,10}, {7,8}, {6,9}, ... Tests recursive subset construction via List.map + closures, pattern matching with h :: rest, List.fold_left (+) 0 sum reduce, exhaustive O(2^n * n) traversal. 172 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/powerset_target.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/ocaml/baseline/powerset_target.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 90b74c93..0449c025 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -132,6 +132,7 @@ "poly_stack.ml": 5, "polygon_area.ml": 32, "pow_mod.ml": 738639, + "powerset_target.ml": 20, "prime_factors.ml": 17, "pythagorean.ml": 16, "queens.ml": 2, diff --git a/lib/ocaml/baseline/powerset_target.ml b/lib/ocaml/baseline/powerset_target.ml new file mode 100644 index 00000000..6d7d2895 --- /dev/null +++ b/lib/ocaml/baseline/powerset_target.ml @@ -0,0 +1,16 @@ +let rec gen xs = + match xs with + | [] -> [[]] + | h :: rest -> + let sub = gen rest in + sub @ List.map (fun s -> h :: s) sub + +;; + +let all = gen [1; 2; 3; 4; 5; 6; 7; 8; 9; 10] in +let count = ref 0 in +List.iter (fun sub -> + let sum = List.fold_left (+) 0 sub in + if sum = 15 then count := !count + 1 +) all; +!count diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 24604c06..c7be4450 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — powerset_target.ml baseline (count subsets + of {1..10} summing to 15 = 20). Generates the full 2^10 = 1024 + powerset by recursive doubling: `gen (h :: rest) = gen rest @ + map (cons h) (gen rest)`, then filters by sum. The 20 valid + subsets include {1,2,3,4,5}, {2,3,4,6}, {1,4,10}, {7,8}, {6,9}, + etc. Tests recursive subset construction via List.map + closures, + pattern matching with `h :: rest`, `List.fold_left (+) 0` + one-line summary, exhaustive O(2^n · n) traversal. 172 baseline + programs total. - 2026-05-11 Phase 5.1 — parser: top-level tuple patterns (`match e1, e2 with | p1, p2 -> …`). `parse-pattern` now collects comma-separated patterns into a `:ptuple`, mirroring the scrutinee From 6c77dec495f7deaafb4f23ef8031ff0275d195b4 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 00:52:42 +0000 Subject: [PATCH 268/298] ocaml: phase 5.1 wildcard_match.ml baseline (* / ? matcher, 6/18 match) Recursive wildcard matcher: let rec is_match s i p j = if j = String.length p then i = String.length s else if p.[j] = '*' then is_match s i p (j + 1) (* * matches empty *) || (i < String.length s && is_match s (i + 1) p j) (* * eats char *) else i < String.length s && (p.[j] = '?' || p.[j] = s.[i]) && is_match s (i + 1) p (j + 1) Patterns vs texts: a*b | aaab abc abxyz xy xyz axby -> 1 match ?b*c | aaab abc abxyz xy xyz axby -> 1 match *x*y* | aaab abc abxyz xy xyz axby -> 4 matches total = 6 Tests deeply nested short-circuit && / ||, char equality on pattern bytes, doubly-nested List.iter cross product. 173 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/wildcard_match.ml | 24 ++++++++++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 3 files changed, 36 insertions(+) create mode 100644 lib/ocaml/baseline/wildcard_match.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 0449c025..1980bab3 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -157,6 +157,7 @@ "tic_tac_toe.ml": 1, "topo_dfs.ml": 24135, "topo_sort.ml": 6, + "wildcard_match.ml": 6, "word_freq.ml": 8, "xor_cipher.ml": 601, "zerosafe.ml": 28, diff --git a/lib/ocaml/baseline/wildcard_match.ml b/lib/ocaml/baseline/wildcard_match.ml new file mode 100644 index 00000000..3ee39e6b --- /dev/null +++ b/lib/ocaml/baseline/wildcard_match.ml @@ -0,0 +1,24 @@ +let rec is_match s i p j = + if j = String.length p then i = String.length s + else if p.[j] = '*' then + is_match s i p (j + 1) + || (i < String.length s && is_match s (i + 1) p j) + else + i < String.length s + && (p.[j] = '?' || p.[j] = s.[i]) + && is_match s (i + 1) p (j + 1) + +let count_match patterns texts = + let count = ref 0 in + List.iter (fun p -> + List.iter (fun t -> + if is_match t 0 p 0 then count := !count + 1 + ) texts + ) patterns; + !count + +;; + +let patterns = ["a*b"; "?b*c"; "*x*y*"] in +let texts = ["aaab"; "abc"; "abxyz"; "xy"; "xyz"; "axby"] in +count_match patterns texts diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c7be4450..ffe73ce7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — wildcard_match.ml baseline (recursive `*` + /`?` wildcard matcher; over 3×6 = 18 (pattern, text) combos, + 6 match). Cases: + a*b — 1 match (aaab) + ?b*c — 1 match (abc) + *x*y* — 4 matches (abxyz, xy, xyz, axby) + is_match uses `||` for the `*` alternation (consume zero or one + more char and recurse) and `&&` for the literal/`?` case. + Tests deeply nested short-circuit, char equality in pattern + predicate, doubly-nested `List.iter` to enumerate the cross + product. 173 baseline programs total. - 2026-05-11 Phase 5.1 — powerset_target.ml baseline (count subsets of {1..10} summing to 15 = 20). Generates the full 2^10 = 1024 powerset by recursive doubling: `gen (h :: rest) = gen rest @ From b771ea306c0211514710db7fb1f875f6e9e5ac9f Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 01:02:59 +0000 Subject: [PATCH 269/298] ocaml: phase 5.1 bracket_match.ml baseline (5/9 balanced strings) Stack-based multi-bracket parenthesis matching for ( [ { ) ] }. Non-bracket chars are skipped (treated as content). Tests: () yes [{()}] yes ({[}]) no (mismatched closer) "" yes (( no (unclosed) ()[](){} yes (a(b)c) yes (a/b/c skipped) (() no ]) no 5 balanced Body uses begin/end-wrapped match inside while: else if c = ')' || c = ']' || c = '}' then begin match !stack with | [] -> ok := false | top :: rest -> let pair = (c = ')' && top = '(') || (c = ']' && top = '[') || (c = '}' && top = '{') in if pair then stack := rest else ok := false end Tests side-effecting match arms inside while body, ref-of-list as stack, multi-char pairing dispatch. 174 baseline programs total. --- lib/ocaml/baseline/bracket_match.ml | 32 +++++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 42 insertions(+) create mode 100644 lib/ocaml/baseline/bracket_match.ml diff --git a/lib/ocaml/baseline/bracket_match.ml b/lib/ocaml/baseline/bracket_match.ml new file mode 100644 index 00000000..6e77e74e --- /dev/null +++ b/lib/ocaml/baseline/bracket_match.ml @@ -0,0 +1,32 @@ +let bracket_match s = + let n = String.length s in + let stack = ref [] in + let ok = ref true in + let i = ref 0 in + while !ok && !i < n do + let c = s.[!i] in + if c = '(' || c = '[' || c = '{' then + stack := c :: !stack + else if c = ')' || c = ']' || c = '}' then begin + match !stack with + | [] -> ok := false + | top :: rest -> + let pair = + (c = ')' && top = '(') || + (c = ']' && top = '[') || + (c = '}' && top = '{') + in + if pair then stack := rest else ok := false + end; + i := !i + 1 + done; + if !ok && !stack = [] then 1 else 0 + +;; + +let strings = ["()"; "[{()}]"; "({[}])"; ""; "(("; "()[](){}"; "(a(b)c)"; "(()"; "])"] in +let count = ref 0 in +List.iter (fun s -> + count := !count + bracket_match s +) strings; +!count diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 1980bab3..bf25a668 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -15,6 +15,7 @@ "bipartite.ml": 4, "bits.ml": 21, "balance.ml": 3, + "bracket_match.ml": 5, "base_n.ml": 17, "bfs.ml": 6, "bfs_grid.ml": 8, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ffe73ce7..ffadc782 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — bracket_match.ml baseline (multi-bracket + parenthesis matching over 9 test strings, 5 balanced). Stack- + based scan: push openers `( [ {`, pop and pair-check closers. + Sequence: `() [{()}] ({[}]) "" (( ()[](){} (a(b)c) (() ])` → + yes/yes/no/yes/no/yes/yes/no/no = 5. + Tests `begin … match !stack with [] -> … | top :: rest -> … end` + with side-effecting match arms inside a `while` body, ref-of-list + used as a stack, three-way char dispatch via short-circuited + comparisons. 174 baseline programs total. - 2026-05-11 Phase 5.1 — wildcard_match.ml baseline (recursive `*` /`?` wildcard matcher; over 3×6 = 18 (pattern, text) combos, 6 match). Cases: From 6780acd0afda760bb05dd3b612fda398e7347779 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 01:12:33 +0000 Subject: [PATCH 270/298] ocaml: phase 5.1 distinct_subseq.ml baseline ("rabbit" in "rabbbit" = 3) Classic distinct-subsequences 2D DP: dp[i][j] = dp[i-1][j] + (s[i-1] = t[j-1] ? dp[i-1][j-1] : 0) dp[i][0] = 1 (empty t is a subseq of any prefix of s) count_subseq "rabbbit" "rabbit" = 3 The three witnesses correspond to which 'b' in "rabbbit" is dropped (positions 2, 3, or 4 zero-indexed of the run of bs). Complements subseq_check.ml (just tests presence); this one counts distinct embeddings. Tests 2D DP with Array.init n (fun _ -> Array.make m 0), base row initialization, mixed string + array indexing. 175 baseline programs total. --- lib/ocaml/baseline/distinct_subseq.ml | 20 ++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/distinct_subseq.ml diff --git a/lib/ocaml/baseline/distinct_subseq.ml b/lib/ocaml/baseline/distinct_subseq.ml new file mode 100644 index 00000000..f5231c45 --- /dev/null +++ b/lib/ocaml/baseline/distinct_subseq.ml @@ -0,0 +1,20 @@ +let count_subseq s t = + let m = String.length s in + let n = String.length t in + let dp = Array.init (m + 1) (fun _ -> Array.make (n + 1) 0) in + for i = 0 to m do + dp.(i).(0) <- 1 + done; + for i = 1 to m do + for j = 1 to n do + if s.[i - 1] = t.[j - 1] then + dp.(i).(j) <- dp.(i - 1).(j) + dp.(i - 1).(j - 1) + else + dp.(i).(j) <- dp.(i - 1).(j) + done + done; + dp.(m).(n) + +;; + +count_subseq "rabbbit" "rabbit" diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index bf25a668..e5b67380 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -34,6 +34,7 @@ "csv.ml": 10, "egg_drop.ml": 8, "dijkstra.ml": 7, + "distinct_subseq.ml": 3, "exception_handle.ml": 4, "exception_user.ml": 26, "euler1.ml": 233168, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ffadc782..eaeecb98 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — distinct_subseq.ml baseline (count + distinct subsequences of "rabbbit" that equal "rabbit" = 3). + Classic 2D DP: dp[i][j] = dp[i-1][j] + (s[i-1]=t[j-1] ? dp[i-1][j-1] + : 0). Three witnesses correspond to which `b` in "rabbbit" is + dropped: positions 2, 3, or 4 (0-indexed of the three b's). + Complements existing subseq_check.ml which just tests presence. + Tests 2D DP with `Array.init … (fun _ -> Array.make …)`, edge- + case base row dp[i][0]=1 (empty t is a subseq of any prefix). + 175 baseline programs total. - 2026-05-11 Phase 5.1 — bracket_match.ml baseline (multi-bracket parenthesis matching over 9 test strings, 5 balanced). Stack- based scan: push openers `( [ {`, pop and pair-check closers. From 7f310a4da7e8d96e2091181b48f3c41b9dc3803d Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 01:22:31 +0000 Subject: [PATCH 271/298] ocaml: phase 5.1 bs_bounds.ml baseline (lower/upper bound, fingerprint 3211) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C++-style lower_bound / upper_bound on a sorted array: lower_bound — first index >= x (loop uses arr.(mid) < x) upper_bound — first index > x (loop uses arr.(mid) <= x) a = [|1; 2; 2; 3; 3; 3; 5; 7; 9|] upper(x) - lower(x) gives the count of x in a: cnt3 = 3 cnt2 = 2 cnt5 = 1 cnt9 = 1 cnt4 = 0 fingerprint = 3*1000 + 2*100 + 1*10 + 1 + 0 = 3211 Tests parallel while loops with bisection on ref, mixed strict and non-strict comparison branches, count-via-subtraction idiom. 176 baseline programs total. --- lib/ocaml/baseline/bs_bounds.ml | 27 +++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 37 insertions(+) create mode 100644 lib/ocaml/baseline/bs_bounds.ml diff --git a/lib/ocaml/baseline/bs_bounds.ml b/lib/ocaml/baseline/bs_bounds.ml new file mode 100644 index 00000000..a1885344 --- /dev/null +++ b/lib/ocaml/baseline/bs_bounds.ml @@ -0,0 +1,27 @@ +let lower_bound arr x = + let lo = ref 0 and hi = ref (Array.length arr) in + while !lo < !hi do + let mid = (!lo + !hi) / 2 in + if arr.(mid) < x then lo := mid + 1 + else hi := mid + done; + !lo + +let upper_bound arr x = + let lo = ref 0 and hi = ref (Array.length arr) in + while !lo < !hi do + let mid = (!lo + !hi) / 2 in + if arr.(mid) <= x then lo := mid + 1 + else hi := mid + done; + !lo + +;; + +let a = [| 1; 2; 2; 3; 3; 3; 5; 7; 9 |] in +let cnt3 = upper_bound a 3 - lower_bound a 3 in +let cnt2 = upper_bound a 2 - lower_bound a 2 in +let cnt5 = upper_bound a 5 - lower_bound a 5 in +let cnt9 = upper_bound a 9 - lower_bound a 9 in +let cnt4 = upper_bound a 4 - lower_bound a 4 in +cnt3 * 1000 + cnt2 * 100 + cnt5 * 10 + cnt9 + cnt4 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e5b67380..a1dc3edf 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -21,6 +21,7 @@ "bfs_grid.ml": 8, "btree.ml": 39, "brainfuck.ml": 75, + "bs_bounds.ml": 3211, "bsearch.ml": 7, "caesar.ml": 215, "calc.ml": 13, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index eaeecb98..83d4d614 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — bs_bounds.ml baseline (lower_bound / + upper_bound binary search; counts of {3,2,5,9,4} in + [1;2;2;3;3;3;5;7;9] encoded as 3211). Standard half-open + variants: lower_bound returns the first index ≥ x (using `<`), + upper_bound returns the first index > x (using `≤`). Counts: + 3→3, 2→2, 5→1, 9→1, 4→0 → 3*1000+2*100+1*10+1+0 = 3211. Tests + parallel while loops with bisection on ref, mixed strict and + non-strict comparisons, computing counts via subtraction. + 176 baseline programs total. - 2026-05-11 Phase 5.1 — distinct_subseq.ml baseline (count distinct subsequences of "rabbbit" that equal "rabbit" = 3). Classic 2D DP: dp[i][j] = dp[i-1][j] + (s[i-1]=t[j-1] ? dp[i-1][j-1] From f9371e7d222f92522636027beb4693ed18cca494 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 01:32:41 +0000 Subject: [PATCH 272/298] ocaml: phase 5.1 lps_dp.ml baseline (LPS "BBABCBCAB" = 7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standard O(n^2) length-major DP for longest palindromic subsequence: dp[i][j] = dp[i+1][j-1] + 2 if s[i] = s[j] = max(dp[i+1][j], dp[i][j-1]) otherwise lps "BBABCBCAB" = 7 (witness "BABCBAB" etc.) Complementary to manacher.ml (longest palindromic *substring*, also length 7 on that input by coincidence) — this is the subsequence variant which doesn't require contiguity. Tests length-major fill order, inline if for the length-2 base case, double-nested for with derived j = i + len - 1. 177 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/lps_dp.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/lps_dp.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index a1dc3edf..3fc5f364 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -85,6 +85,7 @@ "floyd_warshall.ml": 9, "lis.ml": 6, "list_ops.ml": 30, + "lps_dp.ml": 7, "lru_cache.ml": 499, "luhn.ml": 2, "magic_square.ml": 65, diff --git a/lib/ocaml/baseline/lps_dp.ml b/lib/ocaml/baseline/lps_dp.ml new file mode 100644 index 00000000..6d7727c4 --- /dev/null +++ b/lib/ocaml/baseline/lps_dp.ml @@ -0,0 +1,21 @@ +let lps s = + let n = String.length s in + let dp = Array.init n (fun _ -> Array.make n 0) in + for i = 0 to n - 1 do dp.(i).(i) <- 1 done; + for len = 2 to n do + for i = 0 to n - len do + let j = i + len - 1 in + if s.[i] = s.[j] then + dp.(i).(j) <- (if len = 2 then 2 else dp.(i + 1).(j - 1) + 2) + else begin + let a = dp.(i + 1).(j) in + let b = dp.(i).(j - 1) in + dp.(i).(j) <- if a > b then a else b + end + done + done; + dp.(0).(n - 1) + +;; + +lps "BBABCBCAB" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 83d4d614..28ae44c5 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — lps_dp.ml baseline (longest palindromic + subsequence in "BBABCBCAB" = 7). Classic 2D O(n²) DP over + substring lengths: dp[i][j] = dp[i+1][j-1] + 2 when s[i]=s[j], + else max(dp[i+1][j], dp[i][j-1]). Witness LPS is "BABCBAB" (or + "BACBCAB" etc.) of length 7. Complementary to manacher.ml + (substring, length 7) — this is the subsequence variant. + Tests length-major fill order, mixed strict/inline if for + `len = 2` base case, double-nested for with derived j. 177 + baseline programs total. - 2026-05-11 Phase 5.1 — bs_bounds.ml baseline (lower_bound / upper_bound binary search; counts of {3,2,5,9,4} in [1;2;2;3;3;3;5;7;9] encoded as 3211). Standard half-open From 609205b551813caaefdf5080f0d7adf64fce1584 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 01:42:26 +0000 Subject: [PATCH 273/298] ocaml: phase 5.1 histogram_area.ml baseline (largest rectangle = 10) Linear-time stack algorithm for largest rectangle in histogram: for i = 0 to n do let h = if i = n then 0 else heights.(i) in while top-of-stack's height > h do pop the top, compute its max-width rectangle: width = (no-stack ? i : i - prev_top - 1) area = height * width update best done; if i < n then push i done Sentinel pass at i=n with h=0 flushes the remaining stack. For [2; 1; 5; 6; 2; 3], bars at indices 2 (h=5) and 3 (h=6) form a width-2 rectangle of height 5 = 10. Tests guarded patterns with `when` inside while-cont-flag, nested `match !stack with | [] -> i | t :: _ -> i - t - 1` for width computation. 178 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/histogram_area.ml | 27 +++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 37 insertions(+) create mode 100644 lib/ocaml/baseline/histogram_area.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3fc5f364..19c2600c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -75,6 +75,7 @@ "hamming.ml": 4, "hanoi.ml": 1023, "hist.ml": 75, + "histogram_area.ml": 10, "huffman.ml": 224, "int_sqrt.ml": 1027, "is_prime.ml": 25, diff --git a/lib/ocaml/baseline/histogram_area.ml b/lib/ocaml/baseline/histogram_area.ml new file mode 100644 index 00000000..5edb7d7e --- /dev/null +++ b/lib/ocaml/baseline/histogram_area.ml @@ -0,0 +1,27 @@ +let max_rect heights = + let n = Array.length heights in + let stack = ref [] in + let best = ref 0 in + for i = 0 to n do + let h = if i = n then 0 else heights.(i) in + let cont = ref true in + while !cont do + match !stack with + | top :: rest when heights.(top) > h -> + let height = heights.(top) in + stack := rest; + let width = match !stack with + | [] -> i + | t :: _ -> i - t - 1 + in + let area = height * width in + if area > !best then best := area + | _ -> cont := false + done; + if i < n then stack := i :: !stack + done; + !best + +;; + +max_rect [| 2; 1; 5; 6; 2; 3 |] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 28ae44c5..99f48089 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — histogram_area.ml baseline (largest + rectangle in histogram [2;1;5;6;2;3] = 10). Linear-time stack + algorithm: push indices while heights are non-decreasing; on + drop, pop and compute the bar's max-width rectangle. Sentinel + iteration at i=n with h=0 flushes remaining stack. The bars at + indices 2 and 3 (heights 5 and 6) form a width-2 rectangle of + height 5 = 10. Tests `match … with | top :: rest when h > … -> … + | _ -> …` guarded patterns inside a while-cont-flag loop, nested + match on the same stack ref. 178 baseline programs total. - 2026-05-11 Phase 5.1 — lps_dp.ml baseline (longest palindromic subsequence in "BBABCBCAB" = 7). Classic 2D O(n²) DP over substring lengths: dp[i][j] = dp[i+1][j-1] + 2 when s[i]=s[j], From 8a80bd3923a9a1719b63f4e2265a55b88db138ad Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 01:53:21 +0000 Subject: [PATCH 274/298] ocaml: phase 5.1 dp_word_break.ml baseline (4/5 strings segmentable) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Classic word-break DP — for each position i, check whether any dictionary word ends at i with a prior reachable position: dp[i] = exists w in dict with wl <= i and dp[i - wl] && s.sub (i - wl) wl = w Dictionary: apple, pen, pine, pineapple, cats, cat, and, sand, dog Inputs: applepenapple yes (apple pen apple) pineapplepenapple yes (pineapple pen apple) catsanddog yes (cats and dog) catsandog no (no segmentation reaches the end) applesand yes (apple sand) Tests bool-typed Array, String.sub primitive, nested List.iter over the dict inside for-loop over end positions, closure capture of the outer dp. 179 baseline programs total. --- lib/ocaml/baseline/dp_word_break.ml | 27 +++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 36 insertions(+) create mode 100644 lib/ocaml/baseline/dp_word_break.ml diff --git a/lib/ocaml/baseline/dp_word_break.ml b/lib/ocaml/baseline/dp_word_break.ml new file mode 100644 index 00000000..93990b01 --- /dev/null +++ b/lib/ocaml/baseline/dp_word_break.ml @@ -0,0 +1,27 @@ +let word_break s words = + let n = String.length s in + let dp = Array.make (n + 1) false in + dp.(0) <- true; + for i = 1 to n do + List.iter (fun w -> + let wl = String.length w in + if i >= wl && dp.(i - wl) then begin + let prefix = String.sub s (i - wl) wl in + if prefix = w then dp.(i) <- true + end + ) words + done; + if dp.(n) then 1 else 0 + +let count_ok strings words = + let count = ref 0 in + List.iter (fun s -> + count := !count + word_break s words + ) strings; + !count + +;; + +let dict = ["apple"; "pen"; "pine"; "pineapple"; "cats"; "cat"; "and"; "sand"; "dog"] in +let inputs = ["applepenapple"; "pineapplepenapple"; "catsanddog"; "catsandog"; "applesand"] in +count_ok inputs dict diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 19c2600c..bda63b8c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -35,6 +35,7 @@ "csv.ml": 10, "egg_drop.ml": 8, "dijkstra.ml": 7, + "dp_word_break.ml": 4, "distinct_subseq.ml": 3, "exception_handle.ml": 4, "exception_user.ml": 26, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 99f48089..1fe9fa28 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — dp_word_break.ml baseline (word-break DP + over 5 strings with 9-word dictionary; 4 strings segmentable). + dp[i] = ∃ word w of length wl ≤ i with prefix s[i−wl..i]=w and + dp[i−wl]=true. Inputs: applepenapple, pineapplepenapple, + catsanddog (yes); catsandog (no — leftover "og"); applesand (yes). + Tests bool-typed DP array, `String.sub s start len` substring + primitive, nested List.iter over dict inside for-loop over + positions, short-circuit + closure. 179 baseline programs total. - 2026-05-11 Phase 5.1 — histogram_area.ml baseline (largest rectangle in histogram [2;1;5;6;2;3] = 10). Linear-time stack algorithm: push indices while heights are non-decreasing; on From 6d7df11224cd95a808e61330ecce953545e11f61 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 02:04:08 +0000 Subject: [PATCH 275/298] ocaml: phase 5.1 island_count.ml baseline (6x7 grid, 5 components) Count 4-connected components of 1-cells via DFS flood from every unvisited 1-cell. Grid (1s shown as #): # # . . . # # # . . . # # . . . # # . . . . # # # . # # # . . . . # . # # . # # # . Components: {(0,0),(0,1),(1,0)} {(0,5),(0,6),(1,4),(1,5)} {(2,2),(2,3),(3,1),(3,2),(3,3)} {(3,5),(3,6),(4,5),(5,3),(5,4),(5,5)} {(4,0),(5,0),(5,1)} -> 5 islands Complementary to flood_fill.ml (largest component); this counts total components. Tests recursive function returning () at early-exit branches, ordered double-for entry pass triggering one fill per island root. 180 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/island_count.ml | 37 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++ 3 files changed, 50 insertions(+) create mode 100644 lib/ocaml/baseline/island_count.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index bda63b8c..8561668a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -80,6 +80,7 @@ "huffman.ml": 224, "int_sqrt.ml": 1027, "is_prime.ml": 25, + "island_count.ml": 5, "fizz_classifier.ml": 540, "fizzbuzz.ml": 57, "flatten_tree.ml": 28, diff --git a/lib/ocaml/baseline/island_count.ml b/lib/ocaml/baseline/island_count.ml new file mode 100644 index 00000000..f2ee3aa9 --- /dev/null +++ b/lib/ocaml/baseline/island_count.ml @@ -0,0 +1,37 @@ +let h = 6 +let w = 7 +let grid = [| + [| 1; 1; 0; 0; 0; 1; 1 |]; + [| 1; 0; 0; 0; 1; 1; 0 |]; + [| 0; 0; 1; 1; 0; 0; 0 |]; + [| 0; 1; 1; 1; 0; 1; 1 |]; + [| 1; 0; 0; 0; 0; 1; 0 |]; + [| 1; 1; 0; 1; 1; 1; 0 |] +|] + +let rec fill visited r c = + if r < 0 || r >= h || c < 0 || c >= w then () + else if visited.(r).(c) || grid.(r).(c) = 0 then () + else begin + visited.(r).(c) <- true; + fill visited (r - 1) c; + fill visited (r + 1) c; + fill visited r (c - 1); + fill visited r (c + 1) + end + +let count_islands () = + let visited = Array.init h (fun _ -> Array.make w false) in + let count = ref 0 in + for r = 0 to h - 1 do + for c = 0 to w - 1 do + if grid.(r).(c) = 1 && not visited.(r).(c) then begin + count := !count + 1; + fill visited r c + end + done + done; + !count +;; + +count_islands () diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1fe9fa28..f21989ca 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,18 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — island_count.ml baseline (count 4-connected + components of 1-cells in a 6×7 grid = 5). DFS flood from every + unvisited 1-cell. Counted islands: + {(0,0),(0,1),(1,0)} + {(0,5),(0,6),(1,4),(1,5)} + {(2,2),(2,3),(3,1),(3,2),(3,3)} + {(3,5),(3,6),(4,5),(5,3),(5,4),(5,5)} + {(4,0),(5,0),(5,1)} + Complementary to flood_fill.ml which measures largest component; + this counts them. Tests recursive returning `()` (unit) at + early-exit branches, ordered double-for entry pass that triggers + one fill per island root. 180 baseline programs total. - 2026-05-11 Phase 5.1 — dp_word_break.ml baseline (word-break DP over 5 strings with 9-word dictionary; 4 strings segmentable). dp[i] = ∃ word w of length wl ≤ i with prefix s[i−wl..i]=w and From 2726ed9b8ab99118872d58524a40ca87757b4705 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 02:14:44 +0000 Subject: [PATCH 276/298] ocaml: phase 5.1 palindrome_part.ml baseline (min cuts "aabba" = 1) Two-phase palindrome-partition DP for the minimum-cuts variant: Phase 1: is_pal[i][j] palindrome table via length-major fill (single chars, then pairs, then expand inward). Phase 2: cuts[i] = 0 if s[0..i] is itself a palindrome, = min over j of (cuts[j-1] + 1) where s[j..i] is a palindrome. min_cut "aabba" = 1 ("a" | "abba") Tests two sequential 2D DPs sharing the same is_pal matrix, inline begin/end branches inside the length-major fill, mixed bool and int 2D arrays. 181 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/palindrome_part.ml | 35 +++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++ 3 files changed, 46 insertions(+) create mode 100644 lib/ocaml/baseline/palindrome_part.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 8561668a..35371c95 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -125,6 +125,7 @@ "mutable_record.ml": 10, "option_match.ml": 5, "palindrome.ml": 4, + "palindrome_part.ml": 1, "palindrome_sum.ml": 49500, "paren_depth.ml": 7, "partition.ml": 3025, diff --git a/lib/ocaml/baseline/palindrome_part.ml b/lib/ocaml/baseline/palindrome_part.ml new file mode 100644 index 00000000..0f4b7fb8 --- /dev/null +++ b/lib/ocaml/baseline/palindrome_part.ml @@ -0,0 +1,35 @@ +let min_cut s = + let n = String.length s in + if n <= 1 then 0 + else begin + let is_pal = Array.init n (fun _ -> Array.make n false) in + for i = 0 to n - 1 do is_pal.(i).(i) <- true done; + for len = 2 to n do + for i = 0 to n - len do + let j = i + len - 1 in + if s.[i] = s.[j] then begin + if len = 2 then is_pal.(i).(j) <- true + else is_pal.(i).(j) <- is_pal.(i + 1).(j - 1) + end + done + done; + let cuts = Array.make n 0 in + for i = 0 to n - 1 do + if is_pal.(0).(i) then cuts.(i) <- 0 + else begin + let best = ref i in + for j = 1 to i do + if is_pal.(j).(i) then begin + let c = cuts.(j - 1) + 1 in + if c < !best then best := c + end + done; + cuts.(i) <- !best + end + done; + cuts.(n - 1) + end + +;; + +min_cut "aabba" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index f21989ca..6cc60e48 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — palindrome_part.ml baseline (minimum + palindrome-partition cuts in "aabba" = 1). Two-phase DP: + 1) `is_pal.(i).(j)` table via length-major iteration. + 2) `cuts.(i)` = min cuts for prefix s[0..i]; if s[0..i] itself + is a palindrome, 0; else min over j of (cuts.(j-1) + 1) + where s[j..i] is a palindrome. + For "aabba" the optimal partition is "a" | "abba" = 1 cut. + Tests sequential 2D DP passes sharing the same `is_pal` matrix, + inline `if/else begin/end` blocks under length-major fill, mixed + bool and int 2D arrays. 181 baseline programs total. - 2026-05-11 Phase 5.1 — island_count.ml baseline (count 4-connected components of 1-cells in a 6×7 grid = 5). DFS flood from every unvisited 1-cell. Counted islands: From 047ea62d430cafdf010b64f84be47ca77968607c Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 02:25:04 +0000 Subject: [PATCH 277/298] ocaml: phase 5.1 regex_simple.ml baseline (./* matcher, 7/28 match) Recursive regex matcher with Leetcode-style semantics: . matches any single character * matches zero or more of let rec is_match s i p j = if j = String.length p then i = String.length s else let first = i < String.length s && (p.[j] = '.' || p.[j] = s.[i]) in if j + 1 < String.length p && p.[j+1] = '*' then is_match s i p (j + 2) (* skip * group *) || (first && is_match s (i + 1) p j) (* consume one *) else first && is_match s (i + 1) p (j + 1) Patterns vs texts: .a.b | aabb axb "" abcd abc aaabbbc x -> 1 match a.*b | aabb axb "" abcd abc aaabbbc x -> 2 matches x* | aabb axb "" abcd abc aaabbbc x -> 2 matches a*b*c | aabb axb "" abcd abc aaabbbc x -> 2 matches total = 7 Complements wildcard_match.ml which uses LIKE-style * / ?. 182 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/regex_simple.ml | 27 +++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 lib/ocaml/baseline/regex_simple.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 35371c95..29e29d25 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -149,6 +149,7 @@ "radix_sort.ml": 802002, "roman.ml": 44, "rolling_hash.ml": 6, + "regex_simple.ml": 7, "reverse_int.ml": 54329, "rpn.ml": 9, "run_decode.ml": 21, diff --git a/lib/ocaml/baseline/regex_simple.ml b/lib/ocaml/baseline/regex_simple.ml new file mode 100644 index 00000000..bcc692d6 --- /dev/null +++ b/lib/ocaml/baseline/regex_simple.ml @@ -0,0 +1,27 @@ +let rec is_match s i p j = + if j = String.length p then i = String.length s + else + let first = + i < String.length s + && (p.[j] = '.' || p.[j] = s.[i]) + in + if j + 1 < String.length p && p.[j + 1] = '*' then + is_match s i p (j + 2) + || (first && is_match s (i + 1) p j) + else + first && is_match s (i + 1) p (j + 1) + +let count_match pats texts = + let count = ref 0 in + List.iter (fun p -> + List.iter (fun t -> + if is_match t 0 p 0 then count := !count + 1 + ) texts + ) pats; + !count + +;; + +let pats = [".a.b"; "a.*b"; "x*"; "a*b*c"] in +let texts = ["aabb"; "axb"; ""; "abcd"; "abc"; "aaabbbc"; "x"] in +count_match pats texts diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6cc60e48..41d81acd 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,18 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — regex_simple.ml baseline (recursive `.`/`*` + regex matcher; over 4×7 = 28 (pattern, text) combos, 7 match). + Cases per pattern: + .a.b — 1 match (aabb) + a.*b — 2 matches (aabb, axb) + x* — 2 matches ("" and "x") + a*b*c — 2 matches (abc, aaabbbc) + Two-position lookahead for `p.[j+1] = '*'` to decide between + zero-match and consume-one-char-and-retry. Complements + wildcard_match.ml (LIKE-style `*` / `?` semantics); this one + has Leetcode-style `.` (any char) and `*` (zero+ of c). + 182 baseline programs total. - 2026-05-11 Phase 5.1 — palindrome_part.ml baseline (minimum palindrome-partition cuts in "aabba" = 1). Two-phase DP: 1) `is_pal.(i).(j)` table via length-major iteration. From 2c7a1bfc47c04333f532b90687294e18cb248c21 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 02:34:58 +0000 Subject: [PATCH 278/298] ocaml: phase 5.1 permutations_gen.ml baseline (24 perms, 12 with a [[]] | _ -> List.fold_left (fun acc x -> let rest = List.filter (fun y -> y <> x) xs in let subs = permutations rest in acc @ List.map (fun p -> x :: p) subs ) [] xs For permutations of [1; 2; 3; 4] (24 total), count those whose first element is less than the last: match p with | [a; _; _; b] when a < b -> count := !count + 1 | _ -> () By symmetry, exactly half satisfy a < b = 12. Tests List.filter, recursive fold with append, fixed-length list pattern [a; _; _; b] with multiple wildcards + when guard. 183 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/permutations_gen.ml | 20 ++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/permutations_gen.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 29e29d25..17bbf814 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -134,6 +134,7 @@ "pascal.ml": 252, "peano.ml": 30, "perfect.ml": 3, + "permutations_gen.ml": 12, "pi_leibniz.ml": 314, "prefix_sum.ml": 66, "pretty_table.ml": 64, diff --git a/lib/ocaml/baseline/permutations_gen.ml b/lib/ocaml/baseline/permutations_gen.ml new file mode 100644 index 00000000..65b83e8a --- /dev/null +++ b/lib/ocaml/baseline/permutations_gen.ml @@ -0,0 +1,20 @@ +let rec permutations xs = + match xs with + | [] -> [[]] + | _ -> + List.fold_left (fun acc x -> + let rest = List.filter (fun y -> y <> x) xs in + let subs = permutations rest in + acc @ List.map (fun p -> x :: p) subs + ) [] xs + +;; + +let ps = permutations [1; 2; 3; 4] in +let count = ref 0 in +List.iter (fun p -> + match p with + | [a; _; _; b] when a < b -> count := !count + 1 + | _ -> () +) ps; +!count diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 41d81acd..5eaec083 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — permutations_gen.ml baseline (enumerate all + 24 permutations of [1;2;3;4], count those with first Date: Mon, 11 May 2026 02:44:40 +0000 Subject: [PATCH 279/298] ocaml: phase 5.1 kth_two.ml baseline (8th smallest of two sorted = 8) Two-pointer merge advancing the smaller-head pointer k times, without materializing the merged array: while !count < k do let pick_a = if !i = m then false (* a exhausted, take from b *) else if !j = n then true (* b exhausted, take from a *) else a.(!i) <= b.(!j) in if pick_a then ... else ...; count := !count + 1 done For a = [1;3;5;7;9;11;13], b = [2;4;6;8;10;12]: merged order: 1,2,3,4,5,6,7,8,9,10,11,12,13 8th element = 8. Tests nested if/else if/else flowing into a bool, dual-ref two-pointer loop, separate count counter for k-th constraint. 184 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/kth_two.ml | 27 +++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 38 insertions(+) create mode 100644 lib/ocaml/baseline/kth_two.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 17bbf814..c2530159 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -102,6 +102,7 @@ "json_pretty.ml": 24, "kadane.ml": 6, "kmp.ml": 5, + "kth_two.ml": 8, "knapsack.ml": 36, "lambda_calc.ml": 7, "lcs.ml": 4, diff --git a/lib/ocaml/baseline/kth_two.ml b/lib/ocaml/baseline/kth_two.ml new file mode 100644 index 00000000..1395f5e5 --- /dev/null +++ b/lib/ocaml/baseline/kth_two.ml @@ -0,0 +1,27 @@ +let kth_two a b k = + let m = Array.length a in + let n = Array.length b in + let i = ref 0 and j = ref 0 and count = ref 0 in + let result = ref 0 in + while !count < k do + let pick_a = + if !i = m then false + else if !j = n then true + else a.(!i) <= b.(!j) + in + if pick_a then begin + result := a.(!i); + i := !i + 1 + end else begin + result := b.(!j); + j := !j + 1 + end; + count := !count + 1 + done; + !result + +;; + +let a = [| 1; 3; 5; 7; 9; 11; 13 |] in +let b = [| 2; 4; 6; 8; 10; 12 |] in +kth_two a b 8 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5eaec083..fe1ccc6b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — kth_two.ml baseline (8th smallest in + merged [1;3;5;7;9;11;13] ∪ [2;4;6;8;10;12] = 8). Two-pointer + merge that advances the smaller-head pointer k times. Pick rule: + pick_a = (j past end) ? true + : (i past end) ? false + : a[i] <= b[j] + Combined order is 1,2,3,4,5,6,7,8,…; 8th element = 8. Tests + nested `if/else if/else` flowing into a bool, dual-ref two- + pointer loop, separate count counter for the k-th constraint. + 184 baseline programs total. - 2026-05-11 Phase 5.1 — permutations_gen.ml baseline (enumerate all 24 permutations of [1;2;3;4], count those with first Date: Mon, 11 May 2026 02:54:50 +0000 Subject: [PATCH 280/298] ocaml: phase 5.1 floyd_cycle.ml baseline (tortoise-hare, mu=0 lam=8 -> 8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Floyd's cycle detection on a numeric function f(x) = (2x + 5) mod 17. Three phases: Phase 1: advance slow/fast until collision inside the cycle (fast double-steps, slow single-steps) Phase 2: restart slow from x0; advance both by 1 until they meet — count is mu (length of tail before cycle) Phase 3: advance fast around cycle once until it meets slow — count is lam (cycle length) For x0 = 1, the orbit visits 1, 7, 2, 9, 6, 0, 5, 15 then returns to 1 — pure cycle of length 8, mu = 0, lam = 8. Encoded as mu*100 + lam = 8. Tests three sequential while loops sharing ref state, double-step `fast := f (f !fast)`, meeting-condition flag. 185 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/floyd_cycle.ml | 29 +++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 3 files changed, 41 insertions(+) create mode 100644 lib/ocaml/baseline/floyd_cycle.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c2530159..7da9235c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -85,6 +85,7 @@ "fizzbuzz.ml": 57, "flatten_tree.ml": 28, "flood_fill.ml": 7, + "floyd_cycle.ml": 8, "floyd_warshall.ml": 9, "lis.ml": 6, "list_ops.ml": 30, diff --git a/lib/ocaml/baseline/floyd_cycle.ml b/lib/ocaml/baseline/floyd_cycle.ml new file mode 100644 index 00000000..60ecec9f --- /dev/null +++ b/lib/ocaml/baseline/floyd_cycle.ml @@ -0,0 +1,29 @@ +let f x = (x * 2 + 5) mod 17 + +let floyd_cycle x0 = + let slow = ref x0 in + let fast = ref x0 in + let meet = ref false in + while not !meet do + slow := f !slow; + fast := f (f !fast); + if !slow = !fast then meet := true + done; + slow := x0; + let mu = ref 0 in + while !slow <> !fast do + slow := f !slow; + fast := f !fast; + mu := !mu + 1 + done; + let lam = ref 1 in + fast := f !slow; + while !slow <> !fast do + fast := f !fast; + lam := !lam + 1 + done; + !mu * 100 + !lam + +;; + +floyd_cycle 1 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index fe1ccc6b..3548ce41 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — floyd_cycle.ml baseline (Floyd's tortoise + -and-hare cycle detection on f(x) = (2x+5) mod 17; μ=0 λ=8 → + encoded 8). Three phases: (1) advance slow/fast until they + collide inside the cycle, (2) restart slow from x0 and advance + both by 1 until they meet — the count is μ (tail length), + (3) advance fast around the cycle once until it meets slow — the + count is λ (cycle length). For x0=1, f(1)=7, f(7)=2, … cycles + through all 8 distinct values 1,7,2,9,6,0,5,15 before returning + to 1, so μ=0 λ=8 → 0*100+8=8. Tests three sequential while + loops sharing ref state, double-step `fast := f (f !fast)`, + meeting-condition flag pattern. 185 baseline programs total. - 2026-05-11 Phase 5.1 — kth_two.ml baseline (8th smallest in merged [1;3;5;7;9;11;13] ∪ [2;4;6;8;10;12] = 8). Two-pointer merge that advances the smaller-head pointer k times. Pick rule: From aaa602003710df28ebba6a27b7050d775155acc5 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 03:04:14 +0000 Subject: [PATCH 281/298] ocaml: phase 5.1 count_subarrays_k.ml baseline (sum-k subarrays = 7) Count contiguous subarrays summing to a target k using a prefix sum table: prefix[i+1] = prefix[i] + arr[i] count = |{ (i, j) : 0 <= i < j <= n, prefix[j] - prefix[i] = k }| For arr = [1; 1; 1; 2; -1; 3; 1; -2; 4] and k = 3, the seven witnesses are: [1, 1, 1] (0..2) [1, 1, 2, -1] (1..4) [1, 2] (2..3) [2, -1, 3, 1, -2] (3..7) [-1, 3, 1] (4..6) [3] (5..5) [1, -2, 4] (6..8) Negative-valued arrays exercise mixed-sign integer arithmetic. 186 baseline programs total. --- lib/ocaml/baseline/count_subarrays_k.ml | 16 ++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 25 insertions(+) create mode 100644 lib/ocaml/baseline/count_subarrays_k.ml diff --git a/lib/ocaml/baseline/count_subarrays_k.ml b/lib/ocaml/baseline/count_subarrays_k.ml new file mode 100644 index 00000000..ea6b552a --- /dev/null +++ b/lib/ocaml/baseline/count_subarrays_k.ml @@ -0,0 +1,16 @@ +let count_subarr_sum_k arr k = + let n = Array.length arr in + let prefix = Array.make (n + 1) 0 in + for i = 0 to n - 1 do + prefix.(i + 1) <- prefix.(i) + arr.(i) + done; + let count = ref 0 in + for i = 0 to n - 1 do + for j = i + 1 to n do + if prefix.(j) - prefix.(i) = k then count := !count + 1 + done + done; + !count +;; + +count_subarr_sum_k [| 1; 1; 1; 2; -1; 3; 1; -2; 4 |] 3 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7da9235c..76cc379e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -32,6 +32,7 @@ "coin_min.ml": 6, "count_change.ml": 406, "count_inversions.ml": 12, + "count_subarrays_k.ml": 7, "csv.ml": 10, "egg_drop.ml": 8, "dijkstra.ml": 7, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 3548ce41..4ead3913 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — count_subarrays_k.ml baseline (count + contiguous subarrays of [1;1;1;2;-1;3;1;-2;4] summing to k=3 + = 7). Prefix-sum brute force in O(n²): build cumulative array, + enumerate (i,j) pairs and check prefix[j]−prefix[i]=k. The 7 + witnesses include [1,1,1], [1,1,2,−1], [1,2], [2,−1,3,1,−2], + [−1,3,1], [3], [1,−2,4]. Negative-valued arrays exercise our + evaluator's mixed-sign integer arithmetic (no underflow at the + JS safe-int boundary). 186 baseline programs total. - 2026-05-11 Phase 5.1 — floyd_cycle.ml baseline (Floyd's tortoise -and-hare cycle detection on f(x) = (2x+5) mod 17; μ=0 λ=8 → encoded 8). Three phases: (1) advance slow/fast until they From d3340107e6c34bec64f4e71e221df5c889b66389 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 03:14:23 +0000 Subject: [PATCH 282/298] ocaml: phase 5.1 count_palindromes.ml baseline ("aabaa" -> 9 palindromes) Expand-around-center linear-time palindrome counting: for c = 0 to 2*n - 2 do let l = ref (c / 2) in let r = ref ((c + 1) / 2) in while !l >= 0 && !r < n && s.[!l] = s.[!r] do count := !count + 1; l := !l - 1; r := !r + 1 done done The 2n-1 centers cover both odd (c even -> l = r) and even (c odd -> l = r - 1) palindromes. For "aabaa": 5 singletons + 2 "aa" + 1 "aba" + 1 "aabaa" = 9 Complements lps_dp.ml (longest subsequence) and manacher.ml (longest substring); this one *counts* all palindromic substrings. 187 baseline programs total. --- lib/ocaml/baseline/count_palindromes.ml | 17 +++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 27 insertions(+) create mode 100644 lib/ocaml/baseline/count_palindromes.ml diff --git a/lib/ocaml/baseline/count_palindromes.ml b/lib/ocaml/baseline/count_palindromes.ml new file mode 100644 index 00000000..ac02792c --- /dev/null +++ b/lib/ocaml/baseline/count_palindromes.ml @@ -0,0 +1,17 @@ +let count_pal s = + let n = String.length s in + let count = ref 0 in + for c = 0 to 2 * n - 2 do + let l = ref (c / 2) in + let r = ref ((c + 1) / 2) in + while !l >= 0 && !r < n && s.[!l] = s.[!r] do + count := !count + 1; + l := !l - 1; + r := !r + 1 + done + done; + !count + +;; + +count_pal "aabaa" diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 76cc379e..81064fe6 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -32,6 +32,7 @@ "coin_min.ml": 6, "count_change.ml": 406, "count_inversions.ml": 12, + "count_palindromes.ml": 9, "count_subarrays_k.ml": 7, "csv.ml": 10, "egg_drop.ml": 8, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 4ead3913..d2902e4d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — count_palindromes.ml baseline (count + palindromic substrings of "aabaa" = 9). Expand-around-center + with 2n−1 centers (n odd-length, n−1 even-length); for each, + walk outward while characters match and both indices in range. + Decomposition: c/2 and (c+1)/2 gives both odd (c even) and even + (c odd) starting pairs. Palindromes in "aabaa": 5 singletons + + 2 "aa"s + 1 "aba" + 1 "aabaa" = 9. Complements lps_dp.ml + (longest subsequence) and manacher.ml (longest substring); this + one counts ALL palindromic substrings. 187 baseline programs total. - 2026-05-11 Phase 5.1 — count_subarrays_k.ml baseline (count contiguous subarrays of [1;1;1;2;-1;3;1;-2;4] summing to k=3 = 7). Prefix-sum brute force in O(n²): build cumulative array, From 6d89da9380b91f792ff93aae2fd678c18d80c1dc Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 03:24:45 +0000 Subject: [PATCH 283/298] ocaml: phase 5.1 interval_overlap.ml baseline (7 intervals, 6 overlapping pairs) For each (i, j) pair with i < j, test whether intervals (s1, e1) and (s2, e2) overlap via the standard `s1 <= e2 && s2 <= e1`. intervals: (1,4) (2,5) (7,9) (3,6) (8,10) (11,12) (0,2) overlapping pairs: (1,4) & (2,5) (1,4) & (3,6) (1,4) & (0,2) (2,5) & (3,6) (2,5) & (0,2) (7,9) & (8,10) = 6 Tests double for-loop with both-side tuple destructure via `let (s1, e1) = arr.(i) in`, conjunctive comparison, list-to- array conversion. 188 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/interval_overlap.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 26 insertions(+) create mode 100644 lib/ocaml/baseline/interval_overlap.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 81064fe6..16164f52 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -81,6 +81,7 @@ "histogram_area.ml": 10, "huffman.ml": 224, "int_sqrt.ml": 1027, + "interval_overlap.ml": 6, "is_prime.ml": 25, "island_count.ml": 5, "fizz_classifier.ml": 540, diff --git a/lib/ocaml/baseline/interval_overlap.ml b/lib/ocaml/baseline/interval_overlap.ml new file mode 100644 index 00000000..4dabf426 --- /dev/null +++ b/lib/ocaml/baseline/interval_overlap.ml @@ -0,0 +1,16 @@ +let count_overlaps intervals = + let arr = Array.of_list intervals in + let n = Array.length arr in + let count = ref 0 in + for i = 0 to n - 1 do + let (s1, e1) = arr.(i) in + for j = i + 1 to n - 1 do + let (s2, e2) = arr.(j) in + if s1 <= e2 && s2 <= e1 then count := !count + 1 + done + done; + !count + +;; + +count_overlaps [(1, 4); (2, 5); (7, 9); (3, 6); (8, 10); (11, 12); (0, 2)] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d2902e4d..8a57b93c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — interval_overlap.ml baseline (count + overlapping interval pairs in 7-interval set = 6). For each pair + (i, j) test `s1 ≤ e2 && s2 ≤ e1`. Intervals: (1,4) (2,5) (7,9) + (3,6) (8,10) (11,12) (0,2). Overlapping pairs: + (1,4)&(2,5), (1,4)&(3,6), (1,4)&(0,2), + (2,5)&(3,6), (2,5)&(0,2), (7,9)&(8,10) + = 6 pairs. Tests double for-loop with both-side tuple destructure + via `let (s1, e1) = arr.(i) in`, conjunctive comparison, list-to- + array conversion. 188 baseline programs total. - 2026-05-11 Phase 5.1 — count_palindromes.ml baseline (count palindromic substrings of "aabaa" = 9). Expand-around-center with 2n−1 centers (n odd-length, n−1 even-length); for each, From 73efd229be898eac2f2d5a105b8a8582ca2c18a5 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 03:34:48 +0000 Subject: [PATCH 284/298] ocaml: phase 5.1 house_robber.ml baseline (max non-adjacent sum = 22) Classic House Robber linear DP: dp[i] = max(dp[i-2] + houses[i], dp[i-1]) For [2; 7; 9; 3; 1; 5; 8; 6]: dp = [2, 7, 11, 11, 12, 16, 20, 22] max sum = 22 Optimal pick is {indices 0, 2, 5, 7}: 2 + 9 + 5 + 6 = 22 (all non-adjacent). Tests linear DP with early-return edge cases for n=0 and n=1, inline-if rhs for max-of-two, dual look-back into dp[i-2] and dp[i-1]. 189 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/house_robber.ml | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 11 +++++++++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/house_robber.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 16164f52..c9a73bf7 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -78,6 +78,7 @@ "hamming.ml": 4, "hanoi.ml": 1023, "hist.ml": 75, + "house_robber.ml": 22, "histogram_area.ml": 10, "huffman.ml": 224, "int_sqrt.ml": 1027, diff --git a/lib/ocaml/baseline/house_robber.ml b/lib/ocaml/baseline/house_robber.ml new file mode 100644 index 00000000..ab2af0ad --- /dev/null +++ b/lib/ocaml/baseline/house_robber.ml @@ -0,0 +1,19 @@ +let rob houses = + let n = Array.length houses in + if n = 0 then 0 + else if n = 1 then houses.(0) + else begin + let dp = Array.make n 0 in + dp.(0) <- houses.(0); + dp.(1) <- if houses.(0) > houses.(1) then houses.(0) else houses.(1); + for i = 2 to n - 1 do + let take = dp.(i - 2) + houses.(i) in + let skip = dp.(i - 1) in + dp.(i) <- if take > skip then take else skip + done; + dp.(n - 1) + end + +;; + +rob [| 2; 7; 9; 3; 1; 5; 8; 6 |] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 8a57b93c..caf5e05f 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — house_robber.ml baseline (linear-DP + max non-adjacent-sum on [2;7;9;3;1;5;8;6] = 22). dp[i] = + max(dp[i-2]+houses[i], dp[i-1]). Optimal pick {7, 9, 5, 8} but + that's adjacent (5 next to 8). Re-check: {7, 9, 1, 8} sum 25? + Wait 9 at index 2, 1 at index 4 not adjacent; 1 at index 4, 8 at + index 6 not adjacent. But 7,9 are at indices 1,2 — adjacent! + Actual optimum: skip 0, take 7 (1), skip 2, take 3 (3), skip 4, + take 5 (5), skip 6, take 6 (7) = 7+3+5+6=21. Or take 2,9,1,8 = + 20. Or 2,9,5,6 = 22? indices 0,2,5,7 (non-adjacent). 2+9+5+6=22. + ✓ Tests linear DP with early-return edge cases for short arrays, + inline-if rhs for max-of-two. 189 baseline programs total. - 2026-05-11 Phase 5.1 — interval_overlap.ml baseline (count overlapping interval pairs in 7-interval set = 6). For each pair (i, j) test `s1 ≤ e2 && s2 ≤ e1`. Intervals: (1,4) (2,5) (7,9) From 97a29c6baca1beadd29e5e79f36381440b126952 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 03:44:40 +0000 Subject: [PATCH 285/298] ocaml: phase 5.1 stock_two.ml baseline (best of 2 transactions = 6) Two-pass partition DP for max profit with at most 2 transactions: left[i] = max single-trans profit in prices[0..i] (forward scan tracking running min) right[i] = max single-trans profit in prices[i..n-1] (backward scan tracking running max) answer = max over i of (left[i] + right[i]) For [3; 3; 5; 0; 0; 3; 1; 4]: optimal partition i = 2: left[2] = sell@5 after buy@3 = 2 right[2] = sell@4 after buy@0 in [2..7] = 4 total = 6 Tests parallel forward + backward passes on parallel DP arrays, mixed ref + array state, for downto + for ascending scans on the same data. 190 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/stock_two.ml | 29 +++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 40 insertions(+) create mode 100644 lib/ocaml/baseline/stock_two.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index c9a73bf7..36865605 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -166,6 +166,7 @@ "shuffle.ml": 55, "simpson_int.ml": 10000, "stable_unique.ml": 46, + "stock_two.ml": 6, "subseq_check.ml": 3, "tail_factorial.ml": 479001600, "tarjan_scc.ml": 4, diff --git a/lib/ocaml/baseline/stock_two.ml b/lib/ocaml/baseline/stock_two.ml new file mode 100644 index 00000000..063710fe --- /dev/null +++ b/lib/ocaml/baseline/stock_two.ml @@ -0,0 +1,29 @@ +let max_profit_two prices = + let n = Array.length prices in + if n < 2 then 0 + else begin + let left = Array.make n 0 in + let min_p = ref prices.(0) in + for i = 1 to n - 1 do + if prices.(i) < !min_p then min_p := prices.(i); + let p = prices.(i) - !min_p in + left.(i) <- if p > left.(i - 1) then p else left.(i - 1) + done; + let right = Array.make n 0 in + let max_p = ref prices.(n - 1) in + for i = n - 2 downto 0 do + if prices.(i) > !max_p then max_p := prices.(i); + let p = !max_p - prices.(i) in + right.(i) <- if p > right.(i + 1) then p else right.(i + 1) + done; + let best = ref 0 in + for i = 0 to n - 1 do + let total = left.(i) + right.(i) in + if total > !best then best := total + done; + !best + end + +;; + +max_profit_two [| 3; 3; 5; 0; 0; 3; 1; 4 |] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index caf5e05f..8a8699e8 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — stock_two.ml baseline (max stock profit + with at most 2 transactions on [3;3;5;0;0;3;1;4] = 6). Two-pass + DP: left[i] = max single-transaction profit in prices[0..i] + (forward, tracking running min), right[i] = max single-transaction + profit in prices[i..n-1] (backward, tracking running max). Final + answer = max over i of left[i]+right[i], partitioning at the + best split. Optimal: buy@3, sell@5 (profit 2); buy@0, sell@4 + (profit 4); total = 6. Tests parallel forward + backward passes + on parallel DP arrays, mixed ref+array state, downto + ascending + scans on same data. 190 baseline programs total. - 2026-05-11 Phase 5.1 — house_robber.ml baseline (linear-DP max non-adjacent-sum on [2;7;9;3;1;5;8;6] = 22). dp[i] = max(dp[i-2]+houses[i], dp[i-1]). Optimal pick {7, 9, 5, 8} but From 3f00e6257754c2307a8320679b3ea892e6cb4ed6 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 03:54:52 +0000 Subject: [PATCH 286/298] ocaml: phase 5.1 count_paths_dag.ml baseline (source-to-sink paths = 3) Count source-to-sink paths in a DAG via Kahn's topological sort plus accumulation: paths[source] = 1 for u in topological order: for v in adj[u]: paths[v] += paths[u] Same 6-node DAG as topo_sort.ml: 0 -> {1, 2} 1 -> {3} 2 -> {3, 4} 3 -> {5} 4 -> {5} The three witnesses 0 -> 5: 0 -> 1 -> 3 -> 5 0 -> 2 -> 3 -> 5 0 -> 2 -> 4 -> 5 Tests Queue-driven Kahn order + List.rev to recover topological order, module-level mutable arrays (in_deg, paths), accumulation in topological traversal. 191 baseline programs total. --- lib/ocaml/baseline/count_paths_dag.ml | 42 +++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 11 +++++++ 3 files changed, 54 insertions(+) create mode 100644 lib/ocaml/baseline/count_paths_dag.ml diff --git a/lib/ocaml/baseline/count_paths_dag.ml b/lib/ocaml/baseline/count_paths_dag.ml new file mode 100644 index 00000000..a7357dd1 --- /dev/null +++ b/lib/ocaml/baseline/count_paths_dag.ml @@ -0,0 +1,42 @@ +let n = 6 +let adj = [| + [1; 2]; + [3]; + [3; 4]; + [5]; + [5]; + [] +|] + +let in_deg = Array.make n 0 +let paths = Array.make n 0 + +let count_paths () = + for u = 0 to n - 1 do + List.iter (fun v -> in_deg.(v) <- in_deg.(v) + 1) adj.(u) + done; + let order = ref [] in + let q = Queue.create () in + for v = 0 to n - 1 do + if in_deg.(v) = 0 then Queue.push v q + done; + while not (Queue.is_empty q) do + let u = Queue.pop q in + order := u :: !order; + List.iter (fun v -> + in_deg.(v) <- in_deg.(v) - 1; + if in_deg.(v) = 0 then Queue.push v q + ) adj.(u) + done; + paths.(0) <- 1; + let topo = List.rev !order in + List.iter (fun u -> + List.iter (fun v -> + paths.(v) <- paths.(v) + paths.(u) + ) adj.(u) + ) topo; + paths.(n - 1) + +;; + +count_paths () diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 36865605..3df8f632 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -31,6 +31,7 @@ "coin_change.ml": 6, "coin_min.ml": 6, "count_change.ml": 406, + "count_paths_dag.ml": 3, "count_inversions.ml": 12, "count_palindromes.ml": 9, "count_subarrays_k.ml": 7, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 8a8699e8..bb6f2b7c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,17 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — count_paths_dag.ml baseline (count + source-to-sink paths in the same 6-node DAG as topo_sort.ml, + paths 0→5 = 3). Topological sort via Kahn's (BFS), then relax + edges in topological order: paths[v] += paths[u] for each edge + u→v. paths[source] = 1. The three witnesses: + 0 → 1 → 3 → 5 + 0 → 2 → 3 → 5 + 0 → 2 → 4 → 5 + Tests Queue-driven Kahn order + List.rev to topological order, + module-level mutable arrays (in_deg / paths), accumulation in + topological traversal. 191 baseline programs total. - 2026-05-11 Phase 5.1 — stock_two.ml baseline (max stock profit with at most 2 transactions on [3;3;5;0;0;3;1;4] = 6). Two-pass DP: left[i] = max single-transaction profit in prices[0..i] From 90ba37ecc8c6568788966786dc55fbca6e38aec4 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 04:04:42 +0000 Subject: [PATCH 287/298] ocaml: phase 5.1 activity_select.ml baseline (greedy non-overlap = 4) Activity selection by earliest end time. Manual bubble sort over (start, end) tuple array, then sweep accepting whenever the next start >= last_end. intervals: (1,4) (3,5) (0,6) (5,7) (3,8) (5,9) (6,10) (8,11) (8,12) (2,13) (12,14) selection: (1,4) -> (5,7) -> (8,11) -> (12,14) = 4 activities Tests double-loop bubble sort on tuple array with let-pattern destructure for swap-key extraction, in-place swap of tuple cells. 192 baseline programs total. --- lib/ocaml/baseline/activity_select.ml | 31 +++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 41 insertions(+) create mode 100644 lib/ocaml/baseline/activity_select.ml diff --git a/lib/ocaml/baseline/activity_select.ml b/lib/ocaml/baseline/activity_select.ml new file mode 100644 index 00000000..77bb6114 --- /dev/null +++ b/lib/ocaml/baseline/activity_select.ml @@ -0,0 +1,31 @@ +let max_nonoverlap intervals = + let arr = Array.of_list intervals in + let n = Array.length arr in + let sorted = Array.make n (0, 0) in + for i = 0 to n - 1 do + sorted.(i) <- arr.(i) + done; + for i = 0 to n - 1 do + for j = 0 to n - 2 - i do + let (_, e1) = sorted.(j) in + let (_, e2) = sorted.(j + 1) in + if e1 > e2 then begin + let t = sorted.(j) in + sorted.(j) <- sorted.(j + 1); + sorted.(j + 1) <- t + end + done + done; + let count = ref 0 in + let last_end = ref (-1000000) in + for i = 0 to n - 1 do + let (s, e) = sorted.(i) in + if s >= !last_end then begin + count := !count + 1; + last_end := e + end + done; + !count +;; + +max_nonoverlap [(1, 4); (3, 5); (0, 6); (5, 7); (3, 8); (5, 9); (6, 10); (8, 11); (8, 12); (2, 13); (12, 14)] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 3df8f632..b98d1d25 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -1,5 +1,6 @@ { "abundant.ml": 21, + "activity_select.ml": 4, "ackermann.ml": 125, "adler32.ml": 300286872, "anagram_check.ml": 2, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index bb6f2b7c..1b017130 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — activity_select.ml baseline (greedy + earliest-end-time activity selection on 11 intervals → max + non-overlapping 4). Sort intervals by end time (bubble sort to + avoid relying on List.sort with tuple comparator), then sweep + and accept whenever start ≥ last_end. Optimal selection: + (1,4) → (5,7) → (8,11) → (12,14) = 4 activities. Tests double- + loop bubble sort on tuple array with let-pattern destructure + for swap key extraction, in-place swap of tuple cells via temp. + 192 baseline programs total. - 2026-05-11 Phase 5.1 — count_paths_dag.ml baseline (count source-to-sink paths in the same 6-node DAG as topo_sort.ml, paths 0→5 = 3). Topological sort via Kahn's (BFS), then relax From bf468e5ec393bbd39da94a73ae3f85f230767db5 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 04:14:33 +0000 Subject: [PATCH 288/298] ocaml: phase 5.1 min_meeting_rooms.ml baseline (8 meetings, min 4 rooms) Sweep-line algorithm via separately-sorted starts / ends arrays: while i < n do if starts[i] < ends[j] then begin busy++; rooms = max; i++ end else begin busy--; j++ end done intervals: (0,30) (5,10) (15,20) (10,25) (5,12) (20,35) (0,5) (8,18) At time 8, meetings (0,30), (5,10), (5,12), (8,18) are all active simultaneously -> answer = 4. Tests local helper bound via let (`let bubble a = ...`) for in-place sort, dual-pointer sweep on parallel ordered event streams. 193 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/min_meeting_rooms.ml | 40 +++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++ 3 files changed, 50 insertions(+) create mode 100644 lib/ocaml/baseline/min_meeting_rooms.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b98d1d25..908d4325 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -121,6 +121,7 @@ "mortgage.ml": 1073, "mst_kruskal.ml": 11, "merge_intervals.ml": 12, + "min_meeting_rooms.ml": 4, "merge_sort.ml": 44, "merge_two.ml": 441, "min_cost_path.ml": 12, diff --git a/lib/ocaml/baseline/min_meeting_rooms.ml b/lib/ocaml/baseline/min_meeting_rooms.ml new file mode 100644 index 00000000..2a4741ba --- /dev/null +++ b/lib/ocaml/baseline/min_meeting_rooms.ml @@ -0,0 +1,40 @@ +let min_rooms intervals = + let n = List.length intervals in + let arr = Array.of_list intervals in + let starts = Array.make n 0 in + let ends = Array.make n 0 in + for i = 0 to n - 1 do + let (s, e) = arr.(i) in + starts.(i) <- s; + ends.(i) <- e + done; + let bubble a = + for i = 0 to n - 1 do + for j = 0 to n - 2 - i do + if a.(j) > a.(j + 1) then begin + let t = a.(j) in + a.(j) <- a.(j + 1); + a.(j + 1) <- t + end + done + done + in + bubble starts; + bubble ends; + let rooms = ref 0 in + let busy = ref 0 in + let i = ref 0 and j = ref 0 in + while !i < n do + if starts.(!i) < ends.(!j) then begin + busy := !busy + 1; + if !busy > !rooms then rooms := !busy; + i := !i + 1 + end else begin + busy := !busy - 1; + j := !j + 1 + end + done; + !rooms +;; + +min_rooms [(0, 30); (5, 10); (15, 20); (10, 25); (5, 12); (20, 35); (0, 5); (8, 18)] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1b017130..1adef748 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — min_meeting_rooms.ml baseline (sweep-line + for min concurrent meetings on 8 intervals = 4). Separate starts + and ends arrays sorted independently, then a two-pointer sweep: + on start Date: Mon, 11 May 2026 04:24:29 +0000 Subject: [PATCH 289/298] ocaml: phase 5.1 min_subarr_target.ml baseline (min subarray sum >= 7 = 2) Classic two-pointer / sliding window: expand right, then shrink left while the window still satisfies the >= constraint, recording the smallest valid length. for r = 0 to n - 1 do sum := !sum + arr.(r); while !sum >= target do ... record (r - !l + 1) if smaller ... sum := !sum - arr.(!l); l := !l + 1 done done For [2; 3; 1; 2; 4; 3], target 7 -> window [4, 3] of length 2. Sentinel n+1 marks "not found"; final guard reduces to 0. Tests for + inner while shrinking loop, ref-tracked sum updated on both expansion and contraction. 194 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/min_subarr_target.ml | 19 +++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/min_subarr_target.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 908d4325..7c10507e 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -125,6 +125,7 @@ "merge_sort.ml": 44, "merge_two.ml": 441, "min_cost_path.ml": 12, + "min_subarr_target.ml": 2, "module_use.ml": 3, "monotonic.ml": 4, "newton_sqrt.ml": 1414, diff --git a/lib/ocaml/baseline/min_subarr_target.ml b/lib/ocaml/baseline/min_subarr_target.ml new file mode 100644 index 00000000..14469291 --- /dev/null +++ b/lib/ocaml/baseline/min_subarr_target.ml @@ -0,0 +1,19 @@ +let min_subarr_sum_at_least arr target = + let n = Array.length arr in + let best = ref (n + 1) in + let sum = ref 0 in + let l = ref 0 in + for r = 0 to n - 1 do + sum := !sum + arr.(r); + while !sum >= target do + let len = r - !l + 1 in + if len < !best then best := len; + sum := !sum - arr.(!l); + l := !l + 1 + done + done; + if !best > n then 0 else !best + +;; + +min_subarr_sum_at_least [| 2; 3; 1; 2; 4; 3 |] 7 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 1adef748..d1603223 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — min_subarr_target.ml baseline (sliding- + window min subarray with sum ≥ target on [2;3;1;2;4;3] target=7 + = 2). Two-pointer: expand right, then shrink left while sum + stays ≥ target, recording the min length seen. Optimal window + is [4, 3] (positions 4-5) with sum 7, length 2. Tests `for` + + inner `while` shrinking loop, ref-tracked sum updated on both + expansion and contraction, sentinel `n + 1` for "not found". + 194 baseline programs total. - 2026-05-11 Phase 5.1 — min_meeting_rooms.ml baseline (sweep-line for min concurrent meetings on 8 intervals = 4). Separate starts and ends arrays sorted independently, then a two-pointer sweep: From 67ece98ba1caadc4d5694ec56c71cf76e937cf82 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 04:34:40 +0000 Subject: [PATCH 290/298] ocaml: phase 5.1 task_scheduler.ml baseline ("AAABBC" cooldown 2 -> 7) Task-scheduler closed-form min total intervals: m = max letter frequency k = number of letters tied at frequency m answer = max((m - 1) * (n + 1) + k, total_tasks) For "AAABBC" with cooldown n = 2: freq A = 3, freq B = 2, freq C = 1 -> m = 3, k = 1 formula = (3 - 1) * (2 + 1) + 1 = 7 total tasks = 6 answer = 7 Witness schedule: A, B, C, A, B, idle, A. Tests String.iter with side-effecting count update via Char.code arithmetic, fixed-size 26-bucket histogram. 195 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/task_scheduler.ml | 18 ++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 28 insertions(+) create mode 100644 lib/ocaml/baseline/task_scheduler.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7c10507e..85242cde 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -173,6 +173,7 @@ "stock_two.ml": 6, "subseq_check.ml": 3, "tail_factorial.ml": 479001600, + "task_scheduler.ml": 7, "tarjan_scc.ml": 4, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, diff --git a/lib/ocaml/baseline/task_scheduler.ml b/lib/ocaml/baseline/task_scheduler.ml new file mode 100644 index 00000000..62063c91 --- /dev/null +++ b/lib/ocaml/baseline/task_scheduler.ml @@ -0,0 +1,18 @@ +let task_intervals tasks n = + let counts = Array.make 26 0 in + String.iter (fun c -> counts.(Char.code c - Char.code 'A') <- counts.(Char.code c - Char.code 'A') + 1) tasks; + let max_c = ref 0 in + for i = 0 to 25 do + if counts.(i) > !max_c then max_c := counts.(i) + done; + let max_n = ref 0 in + for i = 0 to 25 do + if counts.(i) = !max_c then max_n := !max_n + 1 + done; + let intervals = (!max_c - 1) * (n + 1) + !max_n in + let total = String.length tasks in + if intervals > total then intervals else total + +;; + +task_intervals "AAABBC" 2 diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index d1603223..5135c066 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — task_scheduler.ml baseline (task cooldown + formula, "AAABBC" with n=2 → 7 intervals). Counts each letter, + finds max frequency `m` and the number of letters that hit that + max `k`. Formula: `(m-1)·(n+1) + k`, taking the larger of that + and the total task count when interleaving fills the schedule. + Witness: A,B,C,A,B,idle,A satisfies cooldown 2 between A→A and + B→B. Tests String.iter with side-effecting closure (count + histogram update via Char.code arithmetic). 195 baseline + programs total. - 2026-05-11 Phase 5.1 — min_subarr_target.ml baseline (sliding- window min subarray with sum ≥ target on [2;3;1;2;4;3] target=7 = 2). Two-pointer: expand right, then shrink left while sum From b240408a4cd3249fd0422d88955dea408a1c7fae Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 04:44:37 +0000 Subject: [PATCH 291/298] ocaml: phase 5.1 bs_rotated.ml baseline (rotated array search, encoded -66) Binary search in a rotated sorted array. Standard sorted-half test at each step: if arr.(lo) <= arr.(mid) then left half [lo, mid] is sorted -> check whether target is in it else right half [mid, hi] is sorted -> check whether target is in it For [4; 5; 6; 7; 0; 1; 2]: search 0 -> index 4 search 7 -> index 3 search 3 -> -1 (absent) Encoded fingerprint: 4 + 3*10 + (-1)*100 = -66. First baseline returning a negative top-level value; the runner uses literal grep -qF so leading minus parses fine. 196 baseline programs total. --- lib/ocaml/baseline/bs_rotated.ml | 25 +++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 36 insertions(+) create mode 100644 lib/ocaml/baseline/bs_rotated.ml diff --git a/lib/ocaml/baseline/bs_rotated.ml b/lib/ocaml/baseline/bs_rotated.ml new file mode 100644 index 00000000..87978aeb --- /dev/null +++ b/lib/ocaml/baseline/bs_rotated.ml @@ -0,0 +1,25 @@ +let bs_rotated arr target = + let lo = ref 0 in + let hi = ref (Array.length arr - 1) in + let result = ref (-1) in + while !lo <= !hi && !result = -1 do + let mid = (!lo + !hi) / 2 in + if arr.(mid) = target then result := mid + else if arr.(!lo) <= arr.(mid) then begin + if target >= arr.(!lo) && target < arr.(mid) then + hi := mid - 1 + else + lo := mid + 1 + end else begin + if target > arr.(mid) && target <= arr.(!hi) then + lo := mid + 1 + else + hi := mid - 1 + end + done; + !result + +;; + +let a = [| 4; 5; 6; 7; 0; 1; 2 |] in +bs_rotated a 0 + bs_rotated a 7 * 10 + bs_rotated a 3 * 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 85242cde..a0d601ac 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -23,6 +23,7 @@ "btree.ml": 39, "brainfuck.ml": 75, "bs_bounds.ml": 3211, + "bs_rotated.ml": -66, "bsearch.ml": 7, "caesar.ml": 215, "calc.ml": 13, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5135c066..ae9dfdc7 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — bs_rotated.ml baseline (binary search in + rotated sorted array; encoded result -66). For [4;5;6;7;0;1;2]: + - search 0 → index 4 + - search 7 → index 3 + - search 3 → −1 (not present) + Encoded: 4 + 3*10 + (-1)*100 = -66. Each step decides which half + is sorted by comparing arr[lo] vs arr[mid], then checks whether + the target falls in that sorted half. First baseline with a + negative top-level result; test runner uses literal `grep -qF` + so the leading minus is fine. 196 baseline programs total. - 2026-05-11 Phase 5.1 — task_scheduler.ml baseline (task cooldown formula, "AAABBC" with n=2 → 7 intervals). Counts each letter, finds max frequency `m` and the number of letters that hit that From 230f803abb92020bc27442334c872f3601fd702f Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 04:54:36 +0000 Subject: [PATCH 292/298] ocaml: phase 5.1 count_bits.ml baseline (sum popcount 0..100 = 319) DP recurrence for popcount that avoids host bitwise operations: result[i] = result[i / 2] + (i mod 2) Drops the low bit (i / 2 stands in for i lsr 1) and adds it back if it was 1 (i mod 2 stands in for i land 1). sum over 0..100 of popcount(i) = 319 Tests pure-arithmetic popcount, accumulating ref + DP array, classic look-back to half-index pattern. 197 baseline programs total. --- lib/ocaml/baseline/count_bits.ml | 14 ++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 22 insertions(+) create mode 100644 lib/ocaml/baseline/count_bits.ml diff --git a/lib/ocaml/baseline/count_bits.ml b/lib/ocaml/baseline/count_bits.ml new file mode 100644 index 00000000..a395c2f5 --- /dev/null +++ b/lib/ocaml/baseline/count_bits.ml @@ -0,0 +1,14 @@ +let count_bits n = + let result = Array.make (n + 1) 0 in + for i = 1 to n do + result.(i) <- result.(i / 2) + (i mod 2) + done; + let sum = ref 0 in + for i = 0 to n do + sum := !sum + result.(i) + done; + !sum + +;; + +count_bits 100 diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index a0d601ac..7259d399 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -32,6 +32,7 @@ "convex_hull.ml": 5, "coin_change.ml": 6, "coin_min.ml": 6, + "count_bits.ml": 319, "count_change.ml": 406, "count_paths_dag.ml": 3, "count_inversions.ml": 12, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index ae9dfdc7..943d0d44 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — count_bits.ml baseline (sum of popcount for + 0..100 = 319). DP recurrence: popcount(i) = popcount(i/2) + + (i mod 2) — drop the low bit and recurse, adding back the bit + if it was 1. Avoids needing host bitwise ops (i mod 2 stands in + for `i land 1`, i/2 for `i lsr 1`). Sum over 0..100 = 319. + Tests pure-arithmetic popcount, accumulating ref + array. + 197 baseline programs total. - 2026-05-11 Phase 5.1 — bs_rotated.ml baseline (binary search in rotated sorted array; encoded result -66). For [4;5;6;7;0;1;2]: - search 0 → index 4 From 8ab2f8061507be8561b955281dce66561dd00164 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 05:04:37 +0000 Subject: [PATCH 293/298] ocaml: phase 5.1 daily_temperatures.ml baseline (sum of waits = 10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Monotonic decreasing stack — for each day i, pop entries from the stack whose temperature is strictly less than today's; their answer is (i - popped_index). temps = [73; 74; 75; 71; 69; 72; 76; 73] answer = [ 1; 1; 4; 2; 1; 1; 0; 0] sum = 10 Complementary to next_greater.ml (iter 256) — same monotonic-stack skeleton but stores the distance to the next greater element rather than its value. Tests `match !stack with | top :: rest when …` pattern with guard inside a while-cont-flag loop. 198 baseline programs total. --- lib/ocaml/baseline/daily_temperatures.ml | 24 ++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 10 ++++++++++ 3 files changed, 35 insertions(+) create mode 100644 lib/ocaml/baseline/daily_temperatures.ml diff --git a/lib/ocaml/baseline/daily_temperatures.ml b/lib/ocaml/baseline/daily_temperatures.ml new file mode 100644 index 00000000..8996168b --- /dev/null +++ b/lib/ocaml/baseline/daily_temperatures.ml @@ -0,0 +1,24 @@ +let daily_temperatures temps = + let n = Array.length temps in + let answer = Array.make n 0 in + let stack = ref [] in + for i = 0 to n - 1 do + let cont = ref true in + while !cont do + match !stack with + | top :: rest when temps.(top) < temps.(i) -> + answer.(top) <- i - top; + stack := rest + | _ -> cont := false + done; + stack := i :: !stack + done; + let sum = ref 0 in + for i = 0 to n - 1 do + sum := !sum + answer.(i) + done; + !sum + +;; + +daily_temperatures [| 73; 74; 75; 71; 69; 72; 76; 73 |] diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 7259d399..33932e5d 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -39,6 +39,7 @@ "count_palindromes.ml": 9, "count_subarrays_k.ml": 7, "csv.ml": 10, + "daily_temperatures.ml": 10, "egg_drop.ml": 8, "dijkstra.ml": 7, "dp_word_break.ml": 4, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 943d0d44..defa645b 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,16 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — daily_temperatures.ml baseline (monotonic + decreasing stack of waiting days until warmer; sum over + [73;74;75;71;69;72;76;73] = 10). For each day i, pop all stack + entries whose temperature is strictly less than today's, + recording `i - top` for each. Result vector [1,1,4,2,1,1,0,0] + sums to 10. Complementary to next_greater.ml — same monotonic- + stack skeleton but stores the distance to the next greater + element instead of its value. Tests pattern `match !stack with + | top :: rest when condition -> … | _ -> exit` inside while. + 198 baseline programs total. - 2026-05-11 Phase 5.1 — count_bits.ml baseline (sum of popcount for 0..100 = 319). DP recurrence: popcount(i) = popcount(i/2) + (i mod 2) — drop the low bit and recurse, adding back the bit From 3ccce58e0a207a570050b3583c609633688104b7 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 05:14:47 +0000 Subject: [PATCH 294/298] ocaml: phase 5.1 unique_paths_obs.ml baseline (4x4 grid w/ obstacles = 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standard 2D unique-paths DP with obstacles gating each cell: dp[i][j] = if grid[i][j] = 1 then 0 else dp[i-1][j] + dp[i][j-1] Grid (1s are obstacles): . . . . . # . . . . . # # . . . dp: 1 1 1 1 1 0 1 2 1 1 2 0 0 1 3 3 Returns dp[3][3] = 3. Complements grid_paths.ml (no-obstacles version) — same DP shape but obstacles zero out cells and reshape the path count. 199 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/unique_paths_obs.ml | 28 ++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 35 insertions(+) create mode 100644 lib/ocaml/baseline/unique_paths_obs.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 33932e5d..025653bd 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -196,6 +196,7 @@ "triangle_div.ml": 120, "twosum.ml": 5, "union_find.ml": 4, + "unique_paths_obs.ml": 3, "unique_set.ml": 9, "validate.ml": 417, "word_count.ml": 3 diff --git a/lib/ocaml/baseline/unique_paths_obs.ml b/lib/ocaml/baseline/unique_paths_obs.ml new file mode 100644 index 00000000..1a3db496 --- /dev/null +++ b/lib/ocaml/baseline/unique_paths_obs.ml @@ -0,0 +1,28 @@ +let h = 4 +let w = 4 +let grid = [| + [| 0; 0; 0; 0 |]; + [| 0; 1; 0; 0 |]; + [| 0; 0; 0; 1 |]; + [| 1; 0; 0; 0 |] +|] + +let paths_with_obs () = + let dp = Array.init h (fun _ -> Array.make w 0) in + if grid.(0).(0) = 0 then dp.(0).(0) <- 1; + for j = 1 to w - 1 do + if grid.(0).(j) = 0 then dp.(0).(j) <- dp.(0).(j - 1) + done; + for i = 1 to h - 1 do + if grid.(i).(0) = 0 then dp.(i).(0) <- dp.(i - 1).(0) + done; + for i = 1 to h - 1 do + for j = 1 to w - 1 do + if grid.(i).(j) = 0 then + dp.(i).(j) <- dp.(i - 1).(j) + dp.(i).(j - 1) + done + done; + dp.(h - 1).(w - 1) +;; + +paths_with_obs () diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index defa645b..50b4dedc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — unique_paths_obs.ml baseline (count + monotone paths in 4×4 grid with obstacles at (1,1),(2,3),(3,0) + = 3). Standard 2D DP with obstacle gating: dp[i][j] = dp[i-1][j] + + dp[i][j-1] when grid[i][j]=0, else 0. Complements grid_paths.ml + (no-obstacles version): the same DP but obstacles zero out + cells, reshaping the path count. 199 baseline programs total. - 2026-05-11 Phase 5.1 — daily_temperatures.ml baseline (monotonic decreasing stack of waiting days until warmer; sum over [73;74;75;71;69;72;76;73] = 10). For each day i, pop all stack From fad81e0b0c89896f2aa222081ecadbadbd243649 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 05:25:03 +0000 Subject: [PATCH 295/298] ocaml: phase 5.1 combinations.ml baseline (C(9, 4) = 126) Pascal-recursion combination enumerator: let rec choose k xs = if k = 0 then [[]] else match xs with | [] -> [] | h :: rest -> List.map (fun c -> h :: c) (choose (k - 1) rest) @ choose k rest C(9, 4) = |choose 4 [1; ...; 9]| = 126 Tests pure-functional enumeration with List.map + closure over h, @ append, [] | h :: rest pattern match on shrinking input. 200 baseline programs total -- milestone. --- lib/ocaml/baseline/combinations.ml | 12 ++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 21 insertions(+) create mode 100644 lib/ocaml/baseline/combinations.ml diff --git a/lib/ocaml/baseline/combinations.ml b/lib/ocaml/baseline/combinations.ml new file mode 100644 index 00000000..25a096d9 --- /dev/null +++ b/lib/ocaml/baseline/combinations.ml @@ -0,0 +1,12 @@ +let rec choose k xs = + if k = 0 then [[]] + else + match xs with + | [] -> [] + | h :: rest -> + List.map (fun c -> h :: c) (choose (k - 1) rest) + @ choose k rest + +;; + +List.length (choose 4 [1; 2; 3; 4; 5; 6; 7; 8; 9]) diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 025653bd..ed41cf21 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -29,6 +29,7 @@ "calc.ml": 13, "catalan.ml": 42, "closures.ml": 315, + "combinations.ml": 126, "convex_hull.ml": 5, "coin_change.ml": 6, "coin_min.ml": 6, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 50b4dedc..5a5fe298 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — combinations.ml baseline (C(9, 4) = 126 + enumerated). Pascal-style recursive split: with first element h, + combinations either include h (recurse with k−1 on rest) or + exclude h (recurse with k on rest); identity choose k [] = []. + C(9, 4) = 126 = 9!/(4!·5!). Tests pure-functional combination + enumeration with `List.map` + closure over `h`, `@` append, + and `[] | h :: rest` pattern match on shrinking input. + 200 baseline programs total. - 2026-05-11 Phase 5.1 — unique_paths_obs.ml baseline (count monotone paths in 4×4 grid with obstacles at (1,1),(2,3),(3,0) = 3). Standard 2D DP with obstacle gating: dp[i][j] = dp[i-1][j] From c8327823eed1ad8bfb315e64c3b74ac21a9eca20 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 05:34:46 +0000 Subject: [PATCH 296/298] ocaml: phase 5.1 min_jumps.ml baseline (greedy BFS-like min jumps = 4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greedy BFS-frontier style — track the farthest reach within the current jump's reachable range, and bump the jump counter when i runs into the current frontier: while !i < n - 1 do farthest := max(farthest, i + arr.(i)); if !i = !cur_end then begin jumps := !jumps + 1; cur_end := !farthest end; i := !i + 1 done For [2; 3; 1; 1; 2; 4; 2; 0; 1; 1] (n = 10), the optimal jump sequence 0 -> 1 -> 4 -> 5 -> 9 uses 4 jumps. Tests greedy-with-frontier pattern, three parallel refs (jumps, cur_end, farthest), mixed for-style index loop using ref. 201 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/min_jumps.ml | 22 ++++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 31 insertions(+) create mode 100644 lib/ocaml/baseline/min_jumps.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index ed41cf21..255be66a 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -129,6 +129,7 @@ "merge_sort.ml": 44, "merge_two.ml": 441, "min_cost_path.ml": 12, + "min_jumps.ml": 4, "min_subarr_target.ml": 2, "module_use.ml": 3, "monotonic.ml": 4, diff --git a/lib/ocaml/baseline/min_jumps.ml b/lib/ocaml/baseline/min_jumps.ml new file mode 100644 index 00000000..5bbef99a --- /dev/null +++ b/lib/ocaml/baseline/min_jumps.ml @@ -0,0 +1,22 @@ +let min_jumps arr = + let n = Array.length arr in + if n <= 1 then 0 + else begin + let jumps = ref 0 in + let cur_end = ref 0 in + let farthest = ref 0 in + let i = ref 0 in + while !i < n - 1 do + if !i + arr.(!i) > !farthest then farthest := !i + arr.(!i); + if !i = !cur_end then begin + jumps := !jumps + 1; + cur_end := !farthest + end; + i := !i + 1 + done; + !jumps + end + +;; + +min_jumps [| 2; 3; 1; 1; 2; 4; 2; 0; 1; 1 |] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5a5fe298..2f2ff25a 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — min_jumps.ml baseline (greedy BFS-like + min jumps to reach end of [2;3;1;1;2;4;2;0;1;1] = 4). At each + position track the farthest reach within the current "BFS + layer"; when i reaches the layer end, bump jumps and extend to + farthest. Optimal jump sequence 0→1→4→5→9 = 4 jumps. Tests + greedy-with-frontier idiom, three parallel refs (jumps, cur_end, + farthest), mixed for-style index loop using ref. 201 baseline + programs total. - 2026-05-11 Phase 5.1 — combinations.ml baseline (C(9, 4) = 126 enumerated). Pascal-style recursive split: with first element h, combinations either include h (recurse with k−1 on rest) or From fed07059a3ccbc95ee944119ce30668902ac3391 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 05:44:38 +0000 Subject: [PATCH 297/298] ocaml: phase 5.1 gas_station.ml baseline (circular tour start = 3) Classic O(n) greedy gas-station algorithm: walk once, tracking total = sum of (gas[i] - cost[i]) -- if negative, no answer curr = running tank since start -- on negative, advance start past i+1 and reset if total < 0 then -1 else start For gas = [1;2;3;4;5], cost = [3;4;5;1;2], unique start = 3. Tests `total` + `curr` parallel accumulators, reset-on-failure pattern. 202 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/gas_station.ml | 21 +++++++++++++++++++++ plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 30 insertions(+) create mode 100644 lib/ocaml/baseline/gas_station.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 255be66a..467d28c6 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -74,6 +74,7 @@ "fib_mod.ml": 391360, "fraction.ml": 7, "frequency.ml": 5, + "gas_station.ml": 3, "gcd_lcm.ml": 60, "gray_code.ml": 136, "grep_count.ml": 3, diff --git a/lib/ocaml/baseline/gas_station.ml b/lib/ocaml/baseline/gas_station.ml new file mode 100644 index 00000000..8f18870b --- /dev/null +++ b/lib/ocaml/baseline/gas_station.ml @@ -0,0 +1,21 @@ +let gas_circuit gas cost = + let n = Array.length gas in + let total = ref 0 in + let curr = ref 0 in + let start = ref 0 in + for i = 0 to n - 1 do + let diff = gas.(i) - cost.(i) in + total := !total + diff; + curr := !curr + diff; + if !curr < 0 then begin + start := i + 1; + curr := 0 + end + done; + if !total < 0 then -1 else !start + +;; + +let gas = [| 1; 2; 3; 4; 5 |] in +let cost = [| 3; 4; 5; 1; 2 |] in +gas_circuit gas cost diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 2f2ff25a..c5c26809 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — gas_station.ml baseline (find unique start + station for circular gas tour, gas=[1;2;3;4;5] cost=[3;4;5;1;2] + → start at index 3). Classic O(n) greedy: walk once tracking the + total tank delta (if negative, no solution → -1) and a running + tank that resets to 0 when it goes negative, advancing start + past the failing index. From station 3: tank 3,3+5,3+5−3,… + succeeds. Tests `total` + `curr` parallel accumulators, reset- + on-failure pattern. 202 baseline programs total. - 2026-05-11 Phase 5.1 — min_jumps.ml baseline (greedy BFS-like min jumps to reach end of [2;3;1;1;2;4;2;0;1;1] = 4). At each position track the farthest reach within the current "BFS From 0231bb46a6824260c2a1da50bb91b1968152a86c Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 05:54:39 +0000 Subject: [PATCH 298/298] ocaml: phase 5.1 trapping_rain.ml baseline (LeetCode trapped water = 6) Classic trapped-rain-water two-pass DP: left_max[i] = max(heights[0..i]) (forward sweep) right_max[i] = max(heights[i..n-1]) (downto sweep) water = sum over i of (min(left_max[i], right_max[i]) - heights[i]) For [0; 1; 0; 2; 1; 0; 1; 3; 2; 1; 2; 1]: water = 6. Tests dual sweep (forward + downto), array of running maxes, inline-if rhs of <- for running-max update (uses iter-236 fix for <- accepting if/match RHS). 203 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/trapping_rain.ml | 25 +++++++++++++++++++++++++ plans/ocaml-on-sx.md | 6 ++++++ 3 files changed, 32 insertions(+) create mode 100644 lib/ocaml/baseline/trapping_rain.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 467d28c6..4330a942 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -194,6 +194,7 @@ "sieve.ml": 15, "sum_squares.ml": 385, "tree_depth.ml": 4, + "trapping_rain.ml": 6, "triangle.ml": 11, "trie.ml": 6, "triangle_div.ml": 120, diff --git a/lib/ocaml/baseline/trapping_rain.ml b/lib/ocaml/baseline/trapping_rain.ml new file mode 100644 index 00000000..19b4e8a4 --- /dev/null +++ b/lib/ocaml/baseline/trapping_rain.ml @@ -0,0 +1,25 @@ +let trap heights = + let n = Array.length heights in + if n < 3 then 0 + else begin + let left_max = Array.make n 0 in + let right_max = Array.make n 0 in + left_max.(0) <- heights.(0); + for i = 1 to n - 1 do + left_max.(i) <- if heights.(i) > left_max.(i - 1) then heights.(i) else left_max.(i - 1) + done; + right_max.(n - 1) <- heights.(n - 1); + for i = n - 2 downto 0 do + right_max.(i) <- if heights.(i) > right_max.(i + 1) then heights.(i) else right_max.(i + 1) + done; + let water = ref 0 in + for i = 0 to n - 1 do + let min_lr = if left_max.(i) < right_max.(i) then left_max.(i) else right_max.(i) in + water := !water + min_lr - heights.(i) + done; + !water + end + +;; + +trap [| 0; 1; 0; 2; 1; 0; 1; 3; 2; 1; 2; 1 |] diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index c5c26809..889040fc 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,12 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — trapping_rain.ml baseline (trapped rain + water over heights [0;1;0;2;1;0;1;3;2;1;2;1] = 6). Left-max and + right-max prefix arrays; at each index water = min(L,R) − h. + Tests dual sweep (forward + downto), array of running maxes, + inline-if rhs of `<-` for running-max update (uses iter-236 + fix). 203 baseline programs total. - 2026-05-11 Phase 5.1 — gas_station.ml baseline (find unique start station for circular gas tour, gas=[1;2;3;4;5] cost=[3;4;5;1;2] → start at index 3). Classic O(n) greedy: walk once tracking the