Files
rose-ash/lib/search/tests/index.sx
giles 0f0da0319c
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 53s
search: Phase 2 query AST + boolean/phrase eval + 28 tests
Query ADT (Term|And|Or|Not|Phrase) and evalQuery over docid-sorted posting
lists: boolean ops as linear merges, Not over the allDocs universe, Phrase via
positional adjacency. Batched both test suites into one program eval each
(search-batch) so they finish under heavy CPU load. 46/46.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 18:47:42 +00:00

89 lines
2.8 KiB
Plaintext

;; Phase 1 — tokenize + inverted index.
;; All cases run in ONE program evaluation (search-batch) to stay fast under load.
;; Scalar results (docFreq) are wrapped as singleton lists so the batch is a list
;; of lists.
(define
index-cases
(list
(list
"tokens basic lowercases"
"tokens \"The Cat sat\""
(list "the" "cat" "sat"))
(list
"tokens strips punctuation"
"tokens \"Hello, World!\""
(list "hello" "world"))
(list "tokens collapses whitespace" "tokens \" a b \"" (list "a" "b"))
(list "tokens empty is empty" "tokens \"\"" (list))
(list "tokens keeps digits" "tokens \"abc123 x9\"" (list "abc123" "x9"))
(list
"positioned attaches ordinals"
"positioned \"a b a\""
(list
(list "a" 0)
(list "b" 1)
(list "a" 2)))
(list
"index + lookup single doc"
"lookupTerm \"cat\" (indexDoc 1 \"the cat sat\" emptyIndex)"
(list (list 1 (list 1))))
(list
"lookup missing term is empty"
"lookupTerm \"dog\" (indexDoc 1 \"the cat sat\" emptyIndex)"
(list))
(list
"lookup records all positions"
"lookupTerm \"the\" (indexDoc 1 \"the cat the dog the\" emptyIndex)"
(list (list 1 (list 0 2 4))))
(list
"multi-doc posting list sorted by docid"
"lookupTerm \"x\" (indexDoc 1 \"x y\" (indexDoc 2 \"x z\" emptyIndex))"
(list
(list 1 (list 0))
(list 2 (list 0))))
(list
"index/query case symmetry"
"lookupTerm \"cat\" (indexDoc 1 \"CAT Cat cat\" emptyIndex)"
(list (list 1 (list 0 1 2))))
(list
"re-index replaces a doc"
"lookupTerm \"a\" (indexDoc 1 \"a a a\" (indexDoc 1 \"a\" emptyIndex))"
(list (list 1 (list 0 1 2))))
(list
"delete removes a doc"
"lookupTerm \"cat\" (deleteDoc 1 (indexDoc 1 \"the cat\" emptyIndex))"
(list))
(list
"delete leaves other docs"
"lookupTerm \"cat\" (deleteDoc 2 (indexDoc 2 \"big cat\" (indexDoc 1 \"the cat\" emptyIndex)))"
(list (list 1 (list 1))))
(list
"docFreq counts docs"
"[docFreq \"cat\" (indexDoc 2 \"a cat\" (indexDoc 1 \"the cat\" emptyIndex))]"
(list 2))
(list
"docFreq zero for missing"
"[docFreq \"zzz\" (indexDoc 1 \"a b\" emptyIndex)]"
(list 0))
(list
"allTerms sorted and unique"
"allTerms (indexDoc 1 \"banana apple cherry apple\" emptyIndex)"
(list "apple" "banana" "cherry"))
(list
"allTerms merged across docs"
"allTerms (indexDoc 2 \"d a\" (indexDoc 1 \"c b\" emptyIndex))"
(list "a" "b" "c" "d"))))
(define
index-results
(search-batch "" (map (fn (c) (nth c 1)) index-cases)))
(map-indexed
(fn
(i c)
(hk-test (nth c 0) (nth index-results i) (nth c 2)))
index-cases)
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}