From 713d506bb892e8fab701f5a707b997ecece97366 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 04:08:53 +0000 Subject: [PATCH] ocaml: phase 5.1 kmp.ml baseline (5 occurrences of "abab" in haystack) Knuth-Morris-Pratt linear-time string search: - kmp_table builds failure function in O(|pattern|) - kmp_search scans text once in O(|text|), counting matches - After a hit, k := t.(n-1) so overlapping matches still count kmp_search "abababcabababcababcc" "abab" = 5 Hits at positions 0, 2, 7, 9, 14 (overlapping at 0/2 and 7/9). Tests: nested while-inside-for, char inequality (.<>), pat.[i] string indexing, Array.make 0, combined string + array indexing. 140 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/kmp.ml | 37 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 7 ++++++ 3 files changed, 45 insertions(+) create mode 100644 lib/ocaml/baseline/kmp.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index e6661889..ec5db123 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -79,6 +79,7 @@ "josephus.ml": 11, "json_pretty.ml": 24, "kadane.ml": 6, + "kmp.ml": 5, "lambda_calc.ml": 7, "majority_vote.ml": 4, "levenshtein.ml": 11, diff --git a/lib/ocaml/baseline/kmp.ml b/lib/ocaml/baseline/kmp.ml new file mode 100644 index 00000000..18787f2e --- /dev/null +++ b/lib/ocaml/baseline/kmp.ml @@ -0,0 +1,37 @@ +let kmp_table pat = + let n = String.length pat in + let t = Array.make n 0 in + let k = ref 0 in + for i = 1 to n - 1 do + while !k > 0 && pat.[!k] <> pat.[i] do + k := t.(!k - 1) + done; + if pat.[!k] = pat.[i] then k := !k + 1; + t.(i) <- !k + done; + t + +let kmp_search text pat = + let m = String.length text in + let n = String.length pat in + if n = 0 then 0 + else begin + let t = kmp_table pat in + let count = ref 0 in + let k = ref 0 in + for i = 0 to m - 1 do + while !k > 0 && pat.[!k] <> text.[i] do + k := t.(!k - 1) + done; + if pat.[!k] = text.[i] then k := !k + 1; + if !k = n then begin + count := !count + 1; + k := t.(n - 1) + end + done; + !count + end + +;; + +kmp_search "abababcabababcababcc" "abab" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 153bd948..3094e11d 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — kmp.ml baseline (KMP string search, count + occurrences of "abab" in "abababcabababcababcc" = 5). Two-phase + classic: build failure table in O(m), then linear scan in O(n). + After a full match, set k := t.(n-1) so overlapping matches still + count (so "abab" hits at positions 0, 2, 7, 9, 14). Tests nested + while-inside-for, char comparison via `pat.[i]`, Array.make 0, + combined string + array indexing. 140 baseline programs total. - 2026-05-10 Phase 5.1 — union_find.ml baseline (disjoint-set union on n=10 with 6 unions → 4 components). Path-compressing find: recursively walks parent links, splices subtree onto root in place.