From 32aba1823db4fcf6474d973145a0424d52f8cb5d Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 09:47:25 +0000 Subject: [PATCH] ocaml: phase 5.1 run_length.ml baseline (RLE, sum-of-counts = 11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run-length encoding via tail-recursive 4-arg accumulator: let rle xs = let rec aux xs cur n acc = match xs with | [] -> List.rev ((cur, n) :: acc) | h :: t -> if h = cur then aux t cur (n + 1) acc else aux t h 1 ((cur, n) :: acc) in match xs with | [] -> [] | h :: t -> aux t h 1 [] rle [1;1;1;2;2;3;3;3;3;1;1] = [(1,3);(2,2);(3,4);(1,2)] sum of counts = 11 (matches input length) The sum-of-counts test verifies that the encoding preserves total length — drops or duplicates would diverge. 44 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/run_length.ml | 16 ++++++++++++++++ plans/ocaml-on-sx.md | 7 +++++++ 3 files changed, 24 insertions(+) create mode 100644 lib/ocaml/baseline/run_length.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index b86add6d..d4382a4c 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -36,6 +36,7 @@ "quicksort.ml": 44, "roman.ml": 44, "rpn.ml": 9, + "run_length.ml": 11, "safe_div.ml": 20, "shuffle.ml": 55, "word_freq.ml": 8, diff --git a/lib/ocaml/baseline/run_length.ml b/lib/ocaml/baseline/run_length.ml new file mode 100644 index 00000000..c2ea6737 --- /dev/null +++ b/lib/ocaml/baseline/run_length.ml @@ -0,0 +1,16 @@ +let rle xs = + let rec aux xs cur n acc = + match xs with + | [] -> List.rev ((cur, n) :: acc) + | h :: t -> + if h = cur then aux t cur (n + 1) acc + else aux t h 1 ((cur, n) :: acc) + in + match xs with + | [] -> [] + | h :: t -> aux t h 1 [] + +;; + +List.fold_left (fun acc (_, n) -> acc + n) 0 + (rle [1;1;1;2;2;3;3;3;3;1;1]) diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 23cb7bd0..f0fd71d6 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,13 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 5.1 — run_length.ml baseline (run-length encoding, + sum of counts = 11). RLE encodes [1;1;1;2;2;3;3;3;3;1;1] as + [(1,3);(2,2);(3,4);(1,2)]. Sum-of-counts = 11 verifies that the + encoding preserves total length. Tail-recursive accumulator with + 4-arg helper, two-arm dispatch on whether the next element matches + the current run head, List.rev to restore order, fold_left with + tuple-pattern fun. 44 baseline programs total. - 2026-05-09 Phase 5.1 — grep_count.ml baseline (substring-aware line filter, 3 lines match). Defines a recursive `str_contains` that walks the haystack with `String.sub` slices to find a needle