diff --git a/lib/ocaml/baseline/dp_word_break.ml b/lib/ocaml/baseline/dp_word_break.ml new file mode 100644 index 00000000..93990b01 --- /dev/null +++ b/lib/ocaml/baseline/dp_word_break.ml @@ -0,0 +1,27 @@ +let word_break s words = + let n = String.length s in + let dp = Array.make (n + 1) false in + dp.(0) <- true; + for i = 1 to n do + List.iter (fun w -> + let wl = String.length w in + if i >= wl && dp.(i - wl) then begin + let prefix = String.sub s (i - wl) wl in + if prefix = w then dp.(i) <- true + end + ) words + done; + if dp.(n) then 1 else 0 + +let count_ok strings words = + let count = ref 0 in + List.iter (fun s -> + count := !count + word_break s words + ) strings; + !count + +;; + +let dict = ["apple"; "pen"; "pine"; "pineapple"; "cats"; "cat"; "and"; "sand"; "dog"] in +let inputs = ["applepenapple"; "pineapplepenapple"; "catsanddog"; "catsandog"; "applesand"] in +count_ok inputs dict diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 19c2600c..bda63b8c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -35,6 +35,7 @@ "csv.ml": 10, "egg_drop.ml": 8, "dijkstra.ml": 7, + "dp_word_break.ml": 4, "distinct_subseq.ml": 3, "exception_handle.ml": 4, "exception_user.ml": 26, diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 99f48089..1fe9fa28 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,14 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — dp_word_break.ml baseline (word-break DP + over 5 strings with 9-word dictionary; 4 strings segmentable). + dp[i] = ∃ word w of length wl ≤ i with prefix s[i−wl..i]=w and + dp[i−wl]=true. Inputs: applepenapple, pineapplepenapple, + catsanddog (yes); catsandog (no — leftover "og"); applesand (yes). + Tests bool-typed DP array, `String.sub s start len` substring + primitive, nested List.iter over dict inside for-loop over + positions, short-circuit + closure. 179 baseline programs total. - 2026-05-11 Phase 5.1 — histogram_area.ml baseline (largest rectangle in histogram [2;1;5;6;2;3] = 10). Linear-time stack algorithm: push indices while heights are non-decreasing; on