From bed374c9e196474c16d6aaede5474341df25ecf2 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 07:06:29 +0000 Subject: [PATCH] ocaml: phase 5.1 tarjan_scc.ml baseline (8-node digraph, 4 SCCs) Tarjan's strongly-connected components in a single DFS using index/lowlink: graph (8 nodes, directed): 0 -> 1 -> 2 -> 0 (3-cycle) 2 -> 3 3 -> 4 4 -> 5 -> 6 -> 4 (3-cycle) 4 -> 7 SCCs: {0,1,2}, {3}, {4,5,6}, {7} = 4 components Module-level ref + array state (index_arr, lowlink, on_stack, stack, scc_count). When lowlink(v) = index(v), pop from stack until v is removed; that's a complete SCC. Tests: recursive function with module-level mutable state, nested begin/end branches inside List.iter closure, inner `let rec pop ()` traversing a ref-of-list, pattern match on [] / h :: rest cons-list shape. 154 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/tarjan_scc.ml | 53 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++ 3 files changed, 63 insertions(+) create mode 100644 lib/ocaml/baseline/tarjan_scc.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 9f7e6bb4..42e161f2 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -135,6 +135,7 @@ "stable_unique.ml": 46, "subseq_check.ml": 3, "tail_factorial.ml": 479001600, + "tarjan_scc.ml": 4, "subset_sum.ml": 8, "tic_tac_toe.ml": 1, "topo_sort.ml": 6, diff --git a/lib/ocaml/baseline/tarjan_scc.ml b/lib/ocaml/baseline/tarjan_scc.ml new file mode 100644 index 00000000..6ad3f90e --- /dev/null +++ b/lib/ocaml/baseline/tarjan_scc.ml @@ -0,0 +1,53 @@ +let n = 8 + +let adj = [| + [1]; + [2]; + [0; 3]; + [4]; + [5; 7]; + [6]; + [4]; + [] +|] + +let index_counter = ref 0 +let stack = ref [] +let on_stack = Array.make n false +let index_arr = Array.make n (-1) +let lowlink = Array.make n 0 +let scc_count = ref 0 + +let rec strongconnect v = + index_arr.(v) <- !index_counter; + lowlink.(v) <- !index_counter; + index_counter := !index_counter + 1; + stack := v :: !stack; + on_stack.(v) <- true; + List.iter (fun w -> + if index_arr.(w) = -1 then begin + strongconnect w; + if lowlink.(w) < lowlink.(v) then lowlink.(v) <- lowlink.(w) + end else if on_stack.(w) then begin + if index_arr.(w) < lowlink.(v) then lowlink.(v) <- index_arr.(w) + end + ) adj.(v); + if lowlink.(v) = index_arr.(v) then begin + let rec pop () = + match !stack with + | [] -> () + | w :: rest -> + stack := rest; + on_stack.(w) <- false; + if w <> v then pop () + in + pop (); + scc_count := !scc_count + 1 + end + +;; + +for v = 0 to n - 1 do + if index_arr.(v) = -1 then strongconnect v +done; +!scc_count diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 5bb9b9bb..c881517c 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — tarjan_scc.ml baseline (Tarjan's + strongly-connected components on 8-node digraph → 4 SCCs). + Graph: 0→1→2→0 (cycle) plus 2→3, 3→4, 4→5→6→4 (cycle), 4→7. + SCCs: {0,1,2}, {3}, {4,5,6}, {7} = 4 components. Single DFS + with index/lowlink, on-stack flag, pop until root when + lowlink = index. Tests recursive functions with module-level + ref + array state, nested begin/end branches inside List.iter + closure, inner `let rec pop ()` walking ref-of-list, pattern + match on `[] | h :: rest`. 154 baseline programs total. - 2026-05-10 Phase 5.1 — lev_iter.ml baseline (iterative Levenshtein DP, sum of 5 distances = 16). Rolling-array DP (O(min(m,n)) space). Distances: kitten→sitting=3, saturday→