search: prefix/wildcard queries + 14 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 31s

prefixTerms matches indexed terms by prefix (allTerms + isPrefixOf); prefixDocs
unions their docs; prefixRankTfIdf ranks via the matched terms. 136/136.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-06 20:22:23 +00:00
parent 77ab827b91
commit 9d3b775b25
7 changed files with 97 additions and 7 deletions

View File

@@ -4,7 +4,7 @@
;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc,
;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery,
;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl,
;; topNTfIdfAcl, searchBm25Acl.
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf.
(define
search/src
@@ -19,4 +19,6 @@
"\n"
search/rank-src
"\n"
search/fed-src))
search/fed-src
"\n"
search/prefix-src))

View File

@@ -24,6 +24,7 @@ PRELOADS=(
lib/search/parse.sx
lib/search/rank.sx
lib/search/fed.sx
lib/search/prefix.sx
lib/search/api.sx
lib/search/testlib.sx
)
@@ -34,4 +35,5 @@ SUITES=(
"parse:lib/search/tests/parse.sx"
"rank:lib/search/tests/rank.sx"
"integration:lib/search/tests/integration.sx"
"prefix:lib/search/tests/prefix.sx"
)

10
lib/search/prefix.sx Normal file
View File

@@ -0,0 +1,10 @@
;; search prefix / wildcard queries — Haskell source fragment. Depends on index +
;; rank (reuses candStep / rankTfIdf). A prefix matches every indexed term that
;; starts with it; the matching terms are unioned (OR) into a docid set.
;; prefixTerms :: String -> Index -> [Term] (sorted, from allTerms)
;; prefixDocs :: String -> Index -> [DocId] (sorted union)
;; prefixRankTfIdf :: String -> Index -> [DocId] (ranked by the matched terms)
(define
search/prefix-src
"prefixTerms pre idx = filter (isPrefixOf pre) (allTerms idx)\nprefixDocs pre idx = foldl (candStep idx) [] (prefixTerms pre idx)\nprefixRankTfIdf pre idx = rankTfIdf (prefixTerms pre idx) idx\n")

View File

@@ -1,14 +1,15 @@
{
"lang": "search",
"total_passed": 122,
"total_passed": 136,
"total_failed": 0,
"total": 122,
"total": 136,
"suites": [
{"name":"index","passed":18,"failed":0,"total":18},
{"name":"boolean","passed":28,"failed":0,"total":28},
{"name":"parse","passed":32,"failed":0,"total":32},
{"name":"rank","passed":23,"failed":0,"total":23},
{"name":"integration","passed":21,"failed":0,"total":21}
{"name":"integration","passed":21,"failed":0,"total":21},
{"name":"prefix","passed":14,"failed":0,"total":14}
],
"generated": "2026-06-06T20:07:30+00:00"
"generated": "2026-06-06T20:21:41+00:00"
}

View File

@@ -1,6 +1,6 @@
# search scoreboard
**122 / 122 passing** (0 failure(s)).
**136 / 136 passing** (0 failure(s)).
| Suite | Passed | Total | Status |
|-------|--------|-------|--------|
@@ -9,3 +9,4 @@
| parse | 32 | 32 | ok |
| rank | 23 | 23 | ok |
| integration | 21 | 21 | ok |
| prefix | 14 | 14 | ok |

View File

@@ -0,0 +1,63 @@
;; Extension — prefix / wildcard queries.
;; Corpus: 1 "alpha alpine" 2 "beta apple" 3 "banana alpha"
;; allTerms sorted: alpha alpine apple banana beta
(define
prefix-setup
"idx = indexDoc 3 \"banana alpha\" (indexDoc 2 \"beta apple\" (indexDoc 1 \"alpha alpine\" emptyIndex))\n")
(define
prefix-cases
(list
(list
"prefix terms two matches"
"prefixTerms \"al\" idx"
(list "alpha" "alpine"))
(list
"prefix terms narrower"
"prefixTerms \"alp\" idx"
(list "alpha" "alpine"))
(list
"prefix terms wide"
"prefixTerms \"a\" idx"
(list "alpha" "alpine" "apple"))
(list "prefix terms single" "prefixTerms \"ban\" idx" (list "banana"))
(list "prefix terms exact term" "prefixTerms \"beta\" idx" (list "beta"))
(list "prefix terms none" "prefixTerms \"z\" idx" (list))
(list
"prefix docs union"
"prefixDocs \"al\" idx"
(list 1 3))
(list "prefix docs single term" "prefixDocs \"ban\" idx" (list 3))
(list
"prefix docs wide"
"prefixDocs \"a\" idx"
(list 1 2 3))
(list "prefix docs none" "prefixDocs \"z\" idx" (list))
(list
"prefix docs exact"
"prefixDocs \"alpha\" idx"
(list 1 3))
(list
"prefix rank ranks by matched terms"
"prefixRankTfIdf \"al\" idx"
(list 1 3))
(list
"prefix rank single doc"
"prefixRankTfIdf \"ban\" idx"
(list 3))
(list "prefix rank empty" "prefixRankTfIdf \"z\" idx" (list))))
(define
prefix-results
(search-batch
prefix-setup
(map (fn (c) (nth c 1)) prefix-cases)))
(map-indexed
(fn
(i c)
(hk-test (nth c 0) (nth prefix-results i) (nth c 2)))
prefix-cases)
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}

View File

@@ -108,8 +108,19 @@ lib/search/index.sx lib/search/eval.sx
- [x] `lib/search/tests/integration.sx` — 21 cases: index merge, cross-peer df/lookup,
position preservation, boolean/phrase over the merge, ACL filter + top-N + bm25
## Extensions (post-roadmap, search-shaped vocabulary)
- [x] prefix / wildcard queries (`prefixTerms`, `prefixDocs`, `prefixRankTfIdf`) — 14 tests
- [ ] fuzzy matching — edit distance term expansion
- [ ] result pagination (offset / limit)
- [ ] snippet / highlight generation
- [ ] stemming (suffix stripping) — recall-improving normalizer
## Progress log
- **Extension: prefix/wildcard queries (136/136 total).** `prefixTerms` matches every
indexed term starting with a prefix (via allTerms + isPrefixOf); `prefixDocs` unions
their docs; `prefixRankTfIdf` ranks treating the matched terms as the query. 14 tests.
- **Phase 4 complete — federation + ACL (122/122 total). Roadmap done.** `fedIndex`
merges per-peer inverted indices (union posting lists per term) after relabelling
local DocIds to global `gid = peer*1000 + local` — the bijection makes (peer,doc-id)