;; Phase 3 — ranking (TF-IDF, BM25, top-N). Deterministic: ties broken by DocId. ;; Corpora: ;; idx1: 1 "alpha alpha alpha gamma" 2 "alpha" 3 "beta" ;; idx2: 1 "cat" 2 "cat cat dog elephant frog grape" 3 "zzz" ;; idx3: 1 "kite" 2 "kite" (identical docs -> tiebreak) (define rank-setup "idx1 = indexDoc 3 \"beta\" (indexDoc 2 \"alpha\" (indexDoc 1 \"alpha alpha alpha gamma\" emptyIndex))\nidx2 = indexDoc 3 \"zzz\" (indexDoc 2 \"cat cat dog elephant frog grape\" (indexDoc 1 \"cat\" emptyIndex))\nidx3 = indexDoc 2 \"kite\" (indexDoc 1 \"kite\" emptyIndex)\n") (define rank-cases (list (list "tfidf tf ordering" "rankTfIdf [\"alpha\"] idx1" (list 1 2)) (list "tfidf rare term boosts" "rankTfIdf [\"alpha\", \"beta\"] idx1" (list 1 3 2)) (list "tfidf single-doc term" "rankTfIdf [\"gamma\"] idx1" (list 1)) (list "tfidf absent term empty" "rankTfIdf [\"nope\"] idx1" (list)) (list "tfidf empty query empty" "rankTfIdf [] idx1" (list)) (list "tfidf candidate union tie by docid" "rankTfIdf [\"beta\", \"gamma\"] idx1" (list 1 3)) (list "tfidf tf ordering idx2" "rankTfIdf [\"cat\"] idx2" (list 2 1)) (list "topN tfidf 1" "topNTfIdf 1 [\"alpha\"] idx1" (list 1)) (list "topN tfidf 2" "topNTfIdf 2 [\"alpha\", \"beta\"] idx1" (list 1 3)) (list "topN exceeds results" "topNTfIdf 10 [\"gamma\"] idx1" (list 1)) (list "topN zero" "topNTfIdf 0 [\"alpha\"] idx1" (list)) (list "bm25 tf+length flips tfidf" "rankBm25 1.5 0.75 [\"cat\"] idx2" (list 1 2)) (list "bm25 b=0 ignores length" "rankBm25 1.5 0.0 [\"cat\"] idx2" (list 2 1)) (list "bm25 alpha idx1" "rankBm25 1.5 0.75 [\"alpha\"] idx1" (list 1 2)) (list "bm25 absent empty" "rankBm25 1.5 0.75 [\"nope\"] idx1" (list)) (list "bm25 single-doc term" "rankBm25 1.5 0.75 [\"gamma\"] idx1" (list 1)) (list "bm25 topN 1" "topNBm25 1 1.5 0.75 [\"cat\"] idx2" (list 1)) (list "bm25 same candidate set" "sort (rankBm25 1.5 0.75 [\"alpha\", \"beta\"] idx1)" (list 1 2 3)) (list "tfidf stable tiebreak" "rankTfIdf [\"kite\"] idx3" (list 1 2)) (list "bm25 stable tiebreak" "rankBm25 1.5 0.75 [\"kite\"] idx3" (list 1 2)) (list "numDocs" "[numDocs idx1]" (list 3)) (list "docLen counts tokens" "[docLen 1 idx1]" (list 4)) (list "docFreq via index" "[docFreq \"alpha\" idx1]" (list 2)))) (define rank-results (search-batch rank-setup (map (fn (c) (nth c 1)) rank-cases))) (map-indexed (fn (i c) (hk-test (nth c 0) (nth rank-results i) (nth c 2))) rank-cases) {:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}