From 8a80bd3923a9a1719b63f4e2265a55b88db138ad Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 01:53:21 +0000 Subject: [PATCH] ocaml: phase 5.1 dp_word_break.ml baseline (4/5 strings segmentable) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Classic word-break DP — for each position i, check whether any dictionary word ends at i with a prior reachable position: dp[i] = exists w in dict with wl <= i and dp[i - wl] && s.sub (i - wl) wl = w Dictionary: apple, pen, pine, pineapple, cats, cat, and, sand, dog Inputs: applepenapple yes (apple pen apple) pineapplepenapple yes (pineapple pen apple) catsanddog yes (cats and dog) catsandog no (no segmentation reaches the end) applesand yes (apple sand) Tests bool-typed Array, String.sub primitive, nested List.iter over the dict inside for-loop over end positions, closure capture of the outer dp. 179 baseline programs total. --- lib/ocaml/baseline/dp_word_break.ml | 27 +++++++++++++++++++++++++++ lib/ocaml/baseline/expected.json | 1 + plans/ocaml-on-sx.md | 8 ++++++++ 3 files changed, 36 insertions(+) create mode 100644 lib/ocaml/baseline/dp_word_break.ml diff --git a/lib/ocaml/baseline/dp_word_break.ml b/lib/ocaml/baseline/dp_word_break.ml new file mode 100644 index 00000000..93990b01 --- /dev/null +++ b/lib/ocaml/baseline/dp_word_break.ml @@ -0,0 +1,27 @@ +let word_break s words = + let n = String.length s in + let dp = Array.make (n + 1) false in + dp.(0) <- true; + for i = 1 to n do + List.iter (fun w -> + let wl = String.length w in + if i >= wl && dp.(i - wl) then begin + let prefix = String.sub s (i - wl) wl in + if prefix = w then dp.(i) <- true + end + ) words + done; + if dp.(n) then 1 else 0 + +let count_ok strings words = + let count = ref 0 in + List.iter (fun s -> + count := !count + word_break s words + ) strings; + !count + +;; + +let dict = ["apple"; "pen"; "pine"; "pineapple"; "cats"; "cat"; "and"; "sand"; "dog"] in +let inputs = ["applepenapple"; "pineapplepenapple"; "catsanddog"; "catsandog"; "applesand"] in +count_ok inputs dict diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 19c2600c..bda63b8c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -35,6 +35,7 @@ "csv.ml": 10, "egg_drop.ml": 8, "dijkstra.ml": 7, + "dp_word_break.ml": 4, "distinct_subseq.ml": 3, "exception_handle.ml": 4, "exception_user.ml": 26, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 99f48089..1fe9fa28 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — dp_word_break.ml baseline (word-break DP + over 5 strings with 9-word dictionary; 4 strings segmentable). + dp[i] = ∃ word w of length wl ≤ i with prefix s[i−wl..i]=w and + dp[i−wl]=true. Inputs: applepenapple, pineapplepenapple, + catsanddog (yes); catsandog (no — leftover "og"); applesand (yes). + Tests bool-typed DP array, `String.sub s start len` substring + primitive, nested List.iter over dict inside for-loop over + positions, short-circuit + closure. 179 baseline programs total. - 2026-05-11 Phase 5.1 — histogram_area.ml baseline (largest rectangle in histogram [2;1;5;6;2;3] = 10). Linear-time stack algorithm: push indices while heights are non-decreasing; on