From 8188a82a587d8edb06656455bc0361ffdbcd267a Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 9 May 2026 03:01:28 +0000 Subject: [PATCH] ocaml: phase 6 List.sort upgraded to mergesort (+3 tests, 528 total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous List.sort was O(n^2) insertion sort. Replaced with a straightforward mergesort: split lst -> alternating-take into ([odd], [even]) merge xs ys -> classic two-finger merge under cmp sort cmp xs -> base cases [], [x]; otherwise split + recursive sort on each half + merge Tuple destructuring on the split result is expressed via nested match — let-tuple-destructuring would be cleaner but works today. This benefits sort_uniq (which calls sort first), Set.Make.add via sort etc., and any user program using List.sort. Stable_sort is already aliased to sort. --- lib/ocaml/runtime.sx | 26 +++++++++++++++++++++----- lib/ocaml/test.sh | 13 +++++++++++++ plans/ocaml-on-sx.md | 9 +++++++++ 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/lib/ocaml/runtime.sx b/lib/ocaml/runtime.sx index 24b86568..b1b9bf04 100644 --- a/lib/ocaml/runtime.sx +++ b/lib/ocaml/runtime.sx @@ -146,14 +146,30 @@ let rec sort cmp xs = begin - let rec insert x ys = - match ys with - | [] -> [x] - | h :: t -> if cmp x h <= 0 then x :: ys else h :: insert x t + let rec split lst = + match lst with + | [] -> ([], []) + | [x] -> ([x], []) + | x :: y :: rest -> + (match split rest with + | (a, b) -> (x :: a, y :: b)) + in + let rec merge xs ys = + match xs with + | [] -> ys + | x :: xs' -> + (match ys with + | [] -> xs + | y :: ys' -> + if cmp x y <= 0 then x :: merge xs' (y :: ys') + else y :: merge (x :: xs') ys') in match xs with | [] -> [] - | h :: t -> insert h (sort cmp t) + | [x] -> [x] + | _ -> + (match split xs with + | (a, b) -> merge (sort cmp a) (sort cmp b)) end let stable_sort = sort diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index fe118bc5..ecd7e10e 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -1310,6 +1310,14 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 5054) (eval "(ocaml-run \"max_int + min_int\")") +;; ── List.sort mergesort ────────────────────────────────────── +(epoch 5060) +(eval "(ocaml-run \"List.sort compare [5;2;8;1;9;3;7;4;6]\")") +(epoch 5061) +(eval "(ocaml-run \"List.sort (fun a b -> b - a) [3;1;4;1;5]\")") +(epoch 5062) +(eval "(ocaml-run \"List.length (List.sort compare [9;8;7;6;5;4;3;2;1;0])\")") + EPOCHS OUTPUT=$(timeout 360 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -2081,6 +2089,11 @@ check 5052 "Int.rem 17 5" '2' check 5053 "Int.compare 5 3" '1' check 5054 "max_int + min_int (host int)" '0' +# ── List.sort mergesort ───────────────────────────────────────── +check 5060 "sort 9-element list" '(1 2 3 4 5 6 7 8 9)' +check 5061 "sort with reverse cmp" '(5 4 3 1 1)' +check 5062 "sort 10 reversed -> length" '10' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 455ef84c..7811c7d3 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-09 Phase 6 — List.sort upgraded from O(n²) insertion sort + to O(n log n) mergesort (+3 tests, 528 total). split + merge are + inner functions of sort; tuple destructuring on the split result is + expressed via nested match (pattern parser needs explicit + paren-wrapping of tuple patterns inside match arms in some places — + inline let-tuple destructuring on a match RHS would be cleaner if + multi-binding `let (a, b) = ...` were promoted, but this works + today). Should make sort-using baselines noticeably faster on + larger lists; existing sort_uniq automatically benefits. - 2026-05-09 Phase 4 — integer `/` is now truncate-toward-zero on ints, IEEE on floats. Both operands integral → host floor/ceil based on sign; otherwise host `/`. Fixes `Int.rem` (which was returning 0