From 1dd350d59239af938413bd59c2fb35b26115002b Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 06:21:06 +0000 Subject: [PATCH] ocaml: phase 5.1 huffman.ml baseline (Huffman tree WPL = 224) Classic CLRS Huffman code example. ADT: type tree = Leaf of int * char | Node of int * tree * tree Build by repeatedly merging two lightest trees (sorted-list pq): let rec build_tree lst = match lst with | [t] -> t | a :: b :: rest -> let merged = Node (weight a + weight b, a, b) in build_tree (insert merged rest) weighted path length (= total Huffman bits): leaves {(5,a) (9,b) (12,c) (13,d) (16,e) (45,f)} -> 224 Tests sum-typed ADT with mixed arities, `function` keyword pattern matching, recursive sorted insert, depth-counting recursion. 150 baseline programs total. --- lib/ocaml/baseline/expected.json | 1 + lib/ocaml/baseline/huffman.ml | 38 ++++++++++++++++++++++++++++++++ plans/ocaml-on-sx.md | 9 ++++++++ 3 files changed, 48 insertions(+) create mode 100644 lib/ocaml/baseline/huffman.ml diff --git a/lib/ocaml/baseline/expected.json b/lib/ocaml/baseline/expected.json index 0bd5832e..6a9c8c82 100644 --- a/lib/ocaml/baseline/expected.json +++ b/lib/ocaml/baseline/expected.json @@ -65,6 +65,7 @@ "hamming.ml": 4, "hanoi.ml": 1023, "hist.ml": 75, + "huffman.ml": 224, "int_sqrt.ml": 1027, "is_prime.ml": 25, "fizz_classifier.ml": 540, diff --git a/lib/ocaml/baseline/huffman.ml b/lib/ocaml/baseline/huffman.ml new file mode 100644 index 00000000..774fb09a --- /dev/null +++ b/lib/ocaml/baseline/huffman.ml @@ -0,0 +1,38 @@ +type tree = Leaf of int * char | Node of int * tree * tree + +let weight = function + | Leaf (w, _) -> w + | Node (w, _, _) -> w + +let rec insert t lst = + match lst with + | [] -> [t] + | h :: rest -> + if weight t <= weight h then t :: lst + else h :: insert t rest + +let rec build_tree lst = + match lst with + | [] -> failwith "empty" + | [t] -> t + | a :: b :: rest -> + let merged = Node (weight a + weight b, a, b) in + build_tree (insert merged rest) + +let rec depth d t = + match t with + | Leaf (w, _) -> w * d + | Node (_, l, r) -> depth (d + 1) l + depth (d + 1) r + +;; + +let initial = [ + Leaf (5, 'a'); + Leaf (9, 'b'); + Leaf (12, 'c'); + Leaf (13, 'd'); + Leaf (16, 'e'); + Leaf (45, 'f') +] in +let t = build_tree initial in +depth 0 t diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 19410721..eb1d0725 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -407,6 +407,15 @@ _Newest first._ binary search tree (`type 'a tree = Leaf | Node of 'a * 'a tree * 'a tree`) with insert + in-order traversal. Tests parametric ADT, recursive match, List.append, List.fold_left. +- 2026-05-10 Phase 5.1 — huffman.ml baseline (Huffman tree weighted + path length on letters {(5,a) (9,b) (12,c) (13,d) (16,e) (45,f)} + = 224). Builds optimal prefix code by repeatedly merging the two + lightest trees: insert merged node back into a sorted-by-weight + list. Verifies the standard Huffman result of 224 bits for this + classic CLRS example. Tests sum-typed ADT with two arities + (Leaf of int * char | Node of int * tree * tree), `function` + keyword pattern matching, recursive sorted insert, depth-counting + recursion. 150 baseline programs total. - 2026-05-10 Phase 5.1 — parser: accept `if/match/let/fun/...` as the rhs of `<-` and `:=`. parse-binop-rhs now special-cases prec-1 ops (`<-`, `:=`) to call parse-expr-no-seq for their right operand