From 047ea62d430cafdf010b64f84be47ca77968607c Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 02:25:04 +0000 Subject: [PATCH] ocaml: phase 5.1 regex_simple.ml baseline (./* matcher, 7/28 match) Recursive regex matcher with Leetcode-style semantics: . matches any single character * matches zero or more of let rec is_match s i p j = if j = String.length p then i = String.length s else let first = i < String.length s && (p.[j] = '.' || p.[j] = s.[i]) in if j + 1 < String.length p && p.[j+1] = '*' then is_match s i p (j + 2) (* skip * group *) || (first && is_match s (i + 1) p j) (* consume one *) else first && is_match s (i + 1) p (j + 1) Patterns vs texts: .a.b | aabb axb "" abcd abc aaabbbc x -> 1 match a.*b | aabb axb "" abcd abc aaabbbc x -> 2 matches x* | aabb axb "" abcd abc aaabbbc x -> 2 matches a*b*c | aabb axb "" abcd abc aaabbbc x -> 2 matches total = 7 Complements wildcard_match.ml which uses LIKE-style * / ?. 182 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/regex_simple.ml | 27 +++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 12 ++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 lib/ocaml/baseline/regex_simple.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 35371c95..29e29d25 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -149,6 +149,7 @@ "radix_sort.ml": 802002, "roman.ml": 44, "rolling_hash.ml": 6, + "regex_simple.ml": 7, "reverse_int.ml": 54329, "rpn.ml": 9, "run_decode.ml": 21, diff --git a/lib/ocaml/baseline/regex_simple.ml b/lib/ocaml/baseline/regex_simple.ml new file mode 100644 index 00000000..bcc692d6 --- /dev/null +++ b/lib/ocaml/baseline/regex_simple.ml @@ -0,0 +1,27 @@ +let rec is_match s i p j = + if j = String.length p then i = String.length s + else + let first = + i < String.length s + && (p.[j] = '.' || p.[j] = s.[i]) + in + if j + 1 < String.length p && p.[j + 1] = '*' then + is_match s i p (j + 2) + || (first && is_match s (i + 1) p j) + else + first && is_match s (i + 1) p (j + 1) + +let count_match pats texts = + let count = ref 0 in + List.iter (fun p -> + List.iter (fun t -> + if is_match t 0 p 0 then count := !count + 1 + ) texts + ) pats; + !count + +;; + +let pats = [".a.b"; "a.*b"; "x*"; "a*b*c"] in +let texts = ["aabb"; "axb"; ""; "abcd"; "abc"; "aaabbbc"; "x"] in +count_match pats texts diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 6cc60e48..41d81acd 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,18 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-11 Phase 5.1 — regex_simple.ml baseline (recursive `.`/`*` + regex matcher; over 4×7 = 28 (pattern, text) combos, 7 match). + Cases per pattern: + .a.b — 1 match (aabb) + a.*b — 2 matches (aabb, axb) + x* — 2 matches ("" and "x") + a*b*c — 2 matches (abc, aaabbbc) + Two-position lookahead for `p.[j+1] = '*'` to decide between + zero-match and consume-one-char-and-retry. Complements + wildcard_match.ml (LIKE-style `*` / `?` semantics); this one + has Leetcode-style `.` (any char) and `*` (zero+ of c). + 182 baseline programs total. - 2026-05-11 Phase 5.1 — palindrome_part.ml baseline (minimum palindrome-partition cuts in "aabba" = 1). Two-phase DP: 1) `is_pal.(i).(j)` table via length-major iteration.