search: synonym / query expansion + 9 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 19s

A synonym map [(Term,[Term])] expands a query term to itself + synonyms
(expandTerm); synDocs unions and synRankTfIdf ranks the expanded set. 214/214.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-06 23:27:03 +00:00
parent cf4e613e43
commit cfa68c3db3
7 changed files with 80 additions and 7 deletions

View File

@@ -7,7 +7,7 @@
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf, ;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs, ;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed, ;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
;; nearDocs. ;; nearDocs, expandTerm, synDocs, synRankTfIdf.
(define (define
search/src search/src
@@ -34,4 +34,6 @@
"\n" "\n"
search/stem-src search/stem-src
"\n" "\n"
search/near-src)) search/near-src
"\n"
search/syn-src))

View File

@@ -30,6 +30,7 @@ PRELOADS=(
lib/search/highlight.sx lib/search/highlight.sx
lib/search/stem.sx lib/search/stem.sx
lib/search/near.sx lib/search/near.sx
lib/search/syn.sx
lib/search/api.sx lib/search/api.sx
lib/search/testlib.sx lib/search/testlib.sx
) )
@@ -46,4 +47,5 @@ SUITES=(
"highlight:lib/search/tests/highlight.sx" "highlight:lib/search/tests/highlight.sx"
"stem:lib/search/tests/stem.sx" "stem:lib/search/tests/stem.sx"
"near:lib/search/tests/near.sx" "near:lib/search/tests/near.sx"
"syn:lib/search/tests/syn.sx"
) )

View File

@@ -1,8 +1,8 @@
{ {
"lang": "search", "lang": "search",
"total_passed": 205, "total_passed": 214,
"total_failed": 0, "total_failed": 0,
"total": 205, "total": 214,
"suites": [ "suites": [
{"name":"index","passed":18,"failed":0,"total":18}, {"name":"index","passed":18,"failed":0,"total":18},
{"name":"boolean","passed":28,"failed":0,"total":28}, {"name":"boolean","passed":28,"failed":0,"total":28},
@@ -14,7 +14,8 @@
{"name":"fuzzy","passed":18,"failed":0,"total":18}, {"name":"fuzzy","passed":18,"failed":0,"total":18},
{"name":"highlight","passed":12,"failed":0,"total":12}, {"name":"highlight","passed":12,"failed":0,"total":12},
{"name":"stem","passed":18,"failed":0,"total":18}, {"name":"stem","passed":18,"failed":0,"total":18},
{"name":"near","passed":9,"failed":0,"total":9} {"name":"near","passed":9,"failed":0,"total":9},
{"name":"syn","passed":9,"failed":0,"total":9}
], ],
"generated": "2026-06-06T23:01:07+00:00" "generated": "2026-06-06T23:25:35+00:00"
} }

View File

@@ -1,6 +1,6 @@
# search scoreboard # search scoreboard
**205 / 205 passing** (0 failure(s)). **214 / 214 passing** (0 failure(s)).
| Suite | Passed | Total | Status | | Suite | Passed | Total | Status |
|-------|--------|-------|--------| |-------|--------|-------|--------|
@@ -15,3 +15,4 @@
| highlight | 12 | 12 | ok | | highlight | 12 | 12 | ok |
| stem | 18 | 18 | ok | | stem | 18 | 18 | ok |
| near | 9 | 9 | ok | | near | 9 | 9 | ok |
| syn | 9 | 9 | ok |

10
lib/search/syn.sx Normal file
View File

@@ -0,0 +1,10 @@
;; search synonym / query expansion — Haskell source fragment. Depends on index +
;; rank. A synonym map is an assoc list [(Term, [Term])]; a query term is expanded
;; to itself plus its synonyms, then the expanded set is unioned / ranked.
;; expandTerm :: [(Term,[Term])] -> Term -> [Term]
;; synDocs :: [(Term,[Term])] -> Term -> Index -> [DocId]
;; synRankTfIdf :: [(Term,[Term])] -> Term -> Index -> [DocId]
(define
search/syn-src
"synLookup synmap t = case lookup t synmap of { Nothing -> [] ; Just ss -> ss }\nexpandTerm synmap t = t : synLookup synmap t\nsynDocs synmap t idx = foldl (candStep idx) [] (expandTerm synmap t)\nsynRankTfIdf synmap t idx = rankTfIdf (expandTerm synmap t) idx\n")

53
lib/search/tests/syn.sx Normal file
View File

@@ -0,0 +1,53 @@
;; Extension — synonym / query expansion.
;; synmap: car -> automobile, vehicle ; big -> large
;; Corpus: 1 "fast car" 2 "shiny automobile" 3 "big truck" 4 "large house" 5 "vehicle review"
(define
syn-setup
"synmap = [(\"car\", [\"automobile\", \"vehicle\"]), (\"big\", [\"large\"])]\nidx = indexDoc 5 \"vehicle review\" (indexDoc 4 \"large house\" (indexDoc 3 \"big truck\" (indexDoc 2 \"shiny automobile\" (indexDoc 1 \"fast car\" emptyIndex))))\n")
(define
syn-cases
(list
(list
"expand term with synonyms"
"expandTerm synmap \"car\""
(list "car" "automobile" "vehicle"))
(list
"expand single synonym"
"expandTerm synmap \"big\""
(list "big" "large"))
(list "expand unknown term" "expandTerm synmap \"banana\"" (list "banana"))
(list
"syn docs union"
"synDocs synmap \"car\" idx"
(list 1 2 5))
(list
"syn docs single synonym"
"synDocs synmap \"big\" idx"
(list 3 4))
(list
"syn docs no synonyms"
"synDocs synmap \"house\" idx"
(list 4))
(list "syn docs absent" "synDocs synmap \"plane\" idx" (list))
(list
"syn rank expanded"
"synRankTfIdf synmap \"car\" idx"
(list 1 2 5))
(list
"syn rank single"
"synRankTfIdf synmap \"big\" idx"
(list 3 4))))
(define
syn-results
(search-batch syn-setup (map (fn (c) (nth c 1)) syn-cases)))
(map-indexed
(fn
(i c)
(hk-test (nth c 0) (nth syn-results i) (nth c 2)))
syn-cases)
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}

View File

@@ -119,9 +119,13 @@ lib/search/index.sx lib/search/eval.sx
- [x] stemming (suffix stripping) — `stem`, `stemText`, `stemTokens`, `indexStemmed` - [x] stemming (suffix stripping) — `stem`, `stemText`, `stemTokens`, `indexStemmed`
— 18 tests — 18 tests
- [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests - [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
## Progress log ## Progress log
- **Extension: synonyms/query expansion (214/214 total).** A synonym map
`[(Term,[Term])]` expands a query term to itself + synonyms (`expandTerm`); `synDocs`
unions, `synRankTfIdf` ranks the expanded set. 9 tests.
- **Extension: proximity/NEAR (205/205 total).** `nearDocs k t1 t2 idx` returns docs - **Extension: proximity/NEAR (205/205 total).** `nearDocs k t1 t2 idx` returns docs
where both terms occur within k positions (unordered), candidates = posting where both terms occur within k positions (unordered), candidates = posting
intersection, filtered on the positional postings. 9 tests. intersection, filtered on the positional postings. 9 tests.