From 19d0ef0f38b4438e9d33814c0527e34970e98ed8 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 06:34:13 +0000 Subject: [PATCH] ocaml: phase 5.1 rolling_hash.ml baseline (Rabin-Karp, 6 "abc" matches) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polynomial rolling hash mod 1000003 with base 257: - precompute base^(m-1) - slide window updating hash in O(1) per step - verify hash match with O(m) memcmp to skip false positives rolling_match "abcabcabcabcabcabc" "abc" = 6 Six non-overlapping copies of "abc" at positions 0,3,6,9,12,15. Tests `for _ = 0 to m - 2 do … done` unused loop variable (uses underscore wildcard pattern), Char.code arithmetic, mod arithmetic with intermediate negative subtractions, complex nested if/begin branching with inner break-via-flag. 151 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/rolling_hash.ml | 38 ++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 +++++++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/rolling_hash.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 6a9c8c82..6b361776 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -122,6 +122,7 @@ "quickselect.ml": 5, "quicksort.ml": 44, "roman.ml": 44, + "rolling_hash.ml": 6, "reverse_int.ml": 54329, "rpn.ml": 9, "run_decode.ml": 21, diff --git a/lib/ocaml/baseline/rolling_hash.ml b/lib/ocaml/baseline/rolling_hash.ml new file mode 100644 index 00000000..2b370cf9 --- /dev/null +++ b/lib/ocaml/baseline/rolling_hash.ml @@ -0,0 +1,38 @@ +let mod_p = 1000003 +let base = 257 + +let rolling_match text pat = + let n = String.length text in + let m = String.length pat in + if m > n then 0 + else begin + let pat_hash = ref 0 in + let win_hash = ref 0 in + let high = ref 1 in + for _ = 0 to m - 2 do + high := (!high * base) mod mod_p + done; + for i = 0 to m - 1 do + pat_hash := (!pat_hash * base + Char.code pat.[i]) mod mod_p; + win_hash := (!win_hash * base + Char.code text.[i]) mod mod_p + done; + let count = ref 0 in + for i = 0 to n - m do + if !win_hash = !pat_hash then begin + let ok = ref true in + for j = 0 to m - 1 do + if text.[i + j] <> pat.[j] then ok := false + done; + if !ok then count := !count + 1 + end; + if i < n - m then begin + let drop = (Char.code text.[i] * !high) mod mod_p in + win_hash := ((!win_hash - drop + mod_p) * base + Char.code text.[i + m]) mod mod_p + end + done; + !count + end + +;; + +rolling_match "abcabcabcabcabcabc" "abc" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index eb1d0725..97c7934e 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — rolling_hash.ml baseline (Rabin-Karp + rolling hash for substring matching, count "abc" in + "abcabcabcabcabcabc" = 6). Polynomial hash mod 1000003 with + base 257; precompute base^(m-1), then slide window updating + hash in O(1) per step. Verify hash matches with O(m) memcmp to + avoid false positives. Tests `for _ = 0 to m - 2 do … done` + unused loop variable, char-code arithmetic, mod under negative + intermediate, complex nested if/begin/end branching. + 151 baseline programs total. - 2026-05-10 Phase 5.1 — huffman.ml baseline (Huffman tree weighted path length on letters {(5,a) (9,b) (12,c) (13,d) (16,e) (45,f)} = 224). Builds optimal prefix code by repeatedly merging the two