;; Phase 1 — tokenize + inverted index. ;; All cases run in ONE program evaluation (search-batch) to stay fast under load. ;; Scalar results (docFreq) are wrapped as singleton lists so the batch is a list ;; of lists. (define index-cases (list (list "tokens basic lowercases" "tokens \"The Cat sat\"" (list "the" "cat" "sat")) (list "tokens strips punctuation" "tokens \"Hello, World!\"" (list "hello" "world")) (list "tokens collapses whitespace" "tokens \" a b \"" (list "a" "b")) (list "tokens empty is empty" "tokens \"\"" (list)) (list "tokens keeps digits" "tokens \"abc123 x9\"" (list "abc123" "x9")) (list "positioned attaches ordinals" "positioned \"a b a\"" (list (list "a" 0) (list "b" 1) (list "a" 2))) (list "index + lookup single doc" "lookupTerm \"cat\" (indexDoc 1 \"the cat sat\" emptyIndex)" (list (list 1 (list 1)))) (list "lookup missing term is empty" "lookupTerm \"dog\" (indexDoc 1 \"the cat sat\" emptyIndex)" (list)) (list "lookup records all positions" "lookupTerm \"the\" (indexDoc 1 \"the cat the dog the\" emptyIndex)" (list (list 1 (list 0 2 4)))) (list "multi-doc posting list sorted by docid" "lookupTerm \"x\" (indexDoc 1 \"x y\" (indexDoc 2 \"x z\" emptyIndex))" (list (list 1 (list 0)) (list 2 (list 0)))) (list "index/query case symmetry" "lookupTerm \"cat\" (indexDoc 1 \"CAT Cat cat\" emptyIndex)" (list (list 1 (list 0 1 2)))) (list "re-index replaces a doc" "lookupTerm \"a\" (indexDoc 1 \"a a a\" (indexDoc 1 \"a\" emptyIndex))" (list (list 1 (list 0 1 2)))) (list "delete removes a doc" "lookupTerm \"cat\" (deleteDoc 1 (indexDoc 1 \"the cat\" emptyIndex))" (list)) (list "delete leaves other docs" "lookupTerm \"cat\" (deleteDoc 2 (indexDoc 2 \"big cat\" (indexDoc 1 \"the cat\" emptyIndex)))" (list (list 1 (list 1)))) (list "docFreq counts docs" "[docFreq \"cat\" (indexDoc 2 \"a cat\" (indexDoc 1 \"the cat\" emptyIndex))]" (list 2)) (list "docFreq zero for missing" "[docFreq \"zzz\" (indexDoc 1 \"a b\" emptyIndex)]" (list 0)) (list "allTerms sorted and unique" "allTerms (indexDoc 1 \"banana apple cherry apple\" emptyIndex)" (list "apple" "banana" "cherry")) (list "allTerms merged across docs" "allTerms (indexDoc 2 \"d a\" (indexDoc 1 \"c b\" emptyIndex))" (list "a" "b" "c" "d")))) (define index-results (search-batch "" (map (fn (c) (nth c 1)) index-cases))) (map-indexed (fn (i c) (hk-test (nth c 0) (nth index-results i) (nth c 2))) index-cases) {:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}