search: synonym / query expansion + 9 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 19s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 19s
A synonym map [(Term,[Term])] expands a query term to itself + synonyms (expandTerm); synDocs unions and synRankTfIdf ranks the expanded set. 214/214. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,7 @@
|
|||||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
|
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
|
||||||
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
||||||
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
||||||
;; nearDocs.
|
;; nearDocs, expandTerm, synDocs, synRankTfIdf.
|
||||||
|
|
||||||
(define
|
(define
|
||||||
search/src
|
search/src
|
||||||
@@ -34,4 +34,6 @@
|
|||||||
"\n"
|
"\n"
|
||||||
search/stem-src
|
search/stem-src
|
||||||
"\n"
|
"\n"
|
||||||
search/near-src))
|
search/near-src
|
||||||
|
"\n"
|
||||||
|
search/syn-src))
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ PRELOADS=(
|
|||||||
lib/search/highlight.sx
|
lib/search/highlight.sx
|
||||||
lib/search/stem.sx
|
lib/search/stem.sx
|
||||||
lib/search/near.sx
|
lib/search/near.sx
|
||||||
|
lib/search/syn.sx
|
||||||
lib/search/api.sx
|
lib/search/api.sx
|
||||||
lib/search/testlib.sx
|
lib/search/testlib.sx
|
||||||
)
|
)
|
||||||
@@ -46,4 +47,5 @@ SUITES=(
|
|||||||
"highlight:lib/search/tests/highlight.sx"
|
"highlight:lib/search/tests/highlight.sx"
|
||||||
"stem:lib/search/tests/stem.sx"
|
"stem:lib/search/tests/stem.sx"
|
||||||
"near:lib/search/tests/near.sx"
|
"near:lib/search/tests/near.sx"
|
||||||
|
"syn:lib/search/tests/syn.sx"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
{
|
{
|
||||||
"lang": "search",
|
"lang": "search",
|
||||||
"total_passed": 205,
|
"total_passed": 214,
|
||||||
"total_failed": 0,
|
"total_failed": 0,
|
||||||
"total": 205,
|
"total": 214,
|
||||||
"suites": [
|
"suites": [
|
||||||
{"name":"index","passed":18,"failed":0,"total":18},
|
{"name":"index","passed":18,"failed":0,"total":18},
|
||||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||||
@@ -14,7 +14,8 @@
|
|||||||
{"name":"fuzzy","passed":18,"failed":0,"total":18},
|
{"name":"fuzzy","passed":18,"failed":0,"total":18},
|
||||||
{"name":"highlight","passed":12,"failed":0,"total":12},
|
{"name":"highlight","passed":12,"failed":0,"total":12},
|
||||||
{"name":"stem","passed":18,"failed":0,"total":18},
|
{"name":"stem","passed":18,"failed":0,"total":18},
|
||||||
{"name":"near","passed":9,"failed":0,"total":9}
|
{"name":"near","passed":9,"failed":0,"total":9},
|
||||||
|
{"name":"syn","passed":9,"failed":0,"total":9}
|
||||||
],
|
],
|
||||||
"generated": "2026-06-06T23:01:07+00:00"
|
"generated": "2026-06-06T23:25:35+00:00"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# search scoreboard
|
# search scoreboard
|
||||||
|
|
||||||
**205 / 205 passing** (0 failure(s)).
|
**214 / 214 passing** (0 failure(s)).
|
||||||
|
|
||||||
| Suite | Passed | Total | Status |
|
| Suite | Passed | Total | Status |
|
||||||
|-------|--------|-------|--------|
|
|-------|--------|-------|--------|
|
||||||
@@ -15,3 +15,4 @@
|
|||||||
| highlight | 12 | 12 | ok |
|
| highlight | 12 | 12 | ok |
|
||||||
| stem | 18 | 18 | ok |
|
| stem | 18 | 18 | ok |
|
||||||
| near | 9 | 9 | ok |
|
| near | 9 | 9 | ok |
|
||||||
|
| syn | 9 | 9 | ok |
|
||||||
|
|||||||
10
lib/search/syn.sx
Normal file
10
lib/search/syn.sx
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
;; search synonym / query expansion — Haskell source fragment. Depends on index +
|
||||||
|
;; rank. A synonym map is an assoc list [(Term, [Term])]; a query term is expanded
|
||||||
|
;; to itself plus its synonyms, then the expanded set is unioned / ranked.
|
||||||
|
;; expandTerm :: [(Term,[Term])] -> Term -> [Term]
|
||||||
|
;; synDocs :: [(Term,[Term])] -> Term -> Index -> [DocId]
|
||||||
|
;; synRankTfIdf :: [(Term,[Term])] -> Term -> Index -> [DocId]
|
||||||
|
|
||||||
|
(define
|
||||||
|
search/syn-src
|
||||||
|
"synLookup synmap t = case lookup t synmap of { Nothing -> [] ; Just ss -> ss }\nexpandTerm synmap t = t : synLookup synmap t\nsynDocs synmap t idx = foldl (candStep idx) [] (expandTerm synmap t)\nsynRankTfIdf synmap t idx = rankTfIdf (expandTerm synmap t) idx\n")
|
||||||
53
lib/search/tests/syn.sx
Normal file
53
lib/search/tests/syn.sx
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
;; Extension — synonym / query expansion.
|
||||||
|
;; synmap: car -> automobile, vehicle ; big -> large
|
||||||
|
;; Corpus: 1 "fast car" 2 "shiny automobile" 3 "big truck" 4 "large house" 5 "vehicle review"
|
||||||
|
|
||||||
|
(define
|
||||||
|
syn-setup
|
||||||
|
"synmap = [(\"car\", [\"automobile\", \"vehicle\"]), (\"big\", [\"large\"])]\nidx = indexDoc 5 \"vehicle review\" (indexDoc 4 \"large house\" (indexDoc 3 \"big truck\" (indexDoc 2 \"shiny automobile\" (indexDoc 1 \"fast car\" emptyIndex))))\n")
|
||||||
|
|
||||||
|
(define
|
||||||
|
syn-cases
|
||||||
|
(list
|
||||||
|
(list
|
||||||
|
"expand term with synonyms"
|
||||||
|
"expandTerm synmap \"car\""
|
||||||
|
(list "car" "automobile" "vehicle"))
|
||||||
|
(list
|
||||||
|
"expand single synonym"
|
||||||
|
"expandTerm synmap \"big\""
|
||||||
|
(list "big" "large"))
|
||||||
|
(list "expand unknown term" "expandTerm synmap \"banana\"" (list "banana"))
|
||||||
|
(list
|
||||||
|
"syn docs union"
|
||||||
|
"synDocs synmap \"car\" idx"
|
||||||
|
(list 1 2 5))
|
||||||
|
(list
|
||||||
|
"syn docs single synonym"
|
||||||
|
"synDocs synmap \"big\" idx"
|
||||||
|
(list 3 4))
|
||||||
|
(list
|
||||||
|
"syn docs no synonyms"
|
||||||
|
"synDocs synmap \"house\" idx"
|
||||||
|
(list 4))
|
||||||
|
(list "syn docs absent" "synDocs synmap \"plane\" idx" (list))
|
||||||
|
(list
|
||||||
|
"syn rank expanded"
|
||||||
|
"synRankTfIdf synmap \"car\" idx"
|
||||||
|
(list 1 2 5))
|
||||||
|
(list
|
||||||
|
"syn rank single"
|
||||||
|
"synRankTfIdf synmap \"big\" idx"
|
||||||
|
(list 3 4))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
syn-results
|
||||||
|
(search-batch syn-setup (map (fn (c) (nth c 1)) syn-cases)))
|
||||||
|
|
||||||
|
(map-indexed
|
||||||
|
(fn
|
||||||
|
(i c)
|
||||||
|
(hk-test (nth c 0) (nth syn-results i) (nth c 2)))
|
||||||
|
syn-cases)
|
||||||
|
|
||||||
|
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||||
@@ -119,9 +119,13 @@ lib/search/index.sx lib/search/eval.sx
|
|||||||
- [x] stemming (suffix stripping) — `stem`, `stemText`, `stemTokens`, `indexStemmed`
|
- [x] stemming (suffix stripping) — `stem`, `stemText`, `stemTokens`, `indexStemmed`
|
||||||
— 18 tests
|
— 18 tests
|
||||||
- [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests
|
- [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests
|
||||||
|
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
|
||||||
|
|
||||||
## Progress log
|
## Progress log
|
||||||
|
|
||||||
|
- **Extension: synonyms/query expansion (214/214 total).** A synonym map
|
||||||
|
`[(Term,[Term])]` expands a query term to itself + synonyms (`expandTerm`); `synDocs`
|
||||||
|
unions, `synRankTfIdf` ranks the expanded set. 9 tests.
|
||||||
- **Extension: proximity/NEAR (205/205 total).** `nearDocs k t1 t2 idx` returns docs
|
- **Extension: proximity/NEAR (205/205 total).** `nearDocs k t1 t2 idx` returns docs
|
||||||
where both terms occur within k positions (unordered), candidates = posting
|
where both terms occur within k positions (unordered), candidates = posting
|
||||||
intersection, filtered on the positional postings. 9 tests.
|
intersection, filtered on the positional postings. 9 tests.
|
||||||
|
|||||||
Reference in New Issue
Block a user