search: fuzzy matching via edit distance + 18 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 41s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 41s
editDist as an O(m*n) row-based Levenshtein DP (naive recursion is exponential and times out under load); fuzzyTerms/fuzzyDocs/fuzzyRankTfIdf expand a term to indexed terms within a max edit distance. 166/166. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
74
lib/search/tests/fuzzy.sx
Normal file
74
lib/search/tests/fuzzy.sx
Normal file
@@ -0,0 +1,74 @@
|
||||
;; Extension — fuzzy matching via Levenshtein edit distance.
|
||||
;; Corpus: 1 "color flavor" 2 "colour kitten" 3 "colored"
|
||||
;; allTerms: color colored colour flavor kitten
|
||||
|
||||
(define
|
||||
fuzzy-setup
|
||||
"idx = indexDoc 3 \"colored\" (indexDoc 2 \"colour kitten\" (indexDoc 1 \"color flavor\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
fuzzy-cases
|
||||
(list
|
||||
(list
|
||||
"editDist substitution"
|
||||
"[editDist \"kitten\" \"sitten\"]"
|
||||
(list 1))
|
||||
(list "editDist equal" "[editDist \"abc\" \"abc\"]" (list 0))
|
||||
(list "editDist deletion" "[editDist \"abc\" \"ab\"]" (list 1))
|
||||
(list "editDist insertion" "[editDist \"ab\" \"abc\"]" (list 1))
|
||||
(list "editDist from empty" "[editDist \"\" \"abc\"]" (list 3))
|
||||
(list "editDist both empty" "[editDist \"\" \"\"]" (list 0))
|
||||
(list
|
||||
"editDist classic"
|
||||
"[editDist \"kitten\" \"sitting\"]"
|
||||
(list 3))
|
||||
(list
|
||||
"editDist color colour"
|
||||
"[editDist \"color\" \"colour\"]"
|
||||
(list 1))
|
||||
(list
|
||||
"editDist color colored"
|
||||
"[editDist \"color\" \"colored\"]"
|
||||
(list 2))
|
||||
(list
|
||||
"fuzzy terms dist 1"
|
||||
"fuzzyTerms 1 \"color\" idx"
|
||||
(list "color" "colour"))
|
||||
(list
|
||||
"fuzzy terms dist 2"
|
||||
"fuzzyTerms 2 \"color\" idx"
|
||||
(list "color" "colored" "colour"))
|
||||
(list "fuzzy terms exact" "fuzzyTerms 0 \"color\" idx" (list "color"))
|
||||
(list
|
||||
"fuzzy terms other word"
|
||||
"fuzzyTerms 1 \"flavour\" idx"
|
||||
(list "flavor"))
|
||||
(list
|
||||
"fuzzy docs dist 1"
|
||||
"fuzzyDocs 1 \"color\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"fuzzy docs dist 2"
|
||||
"fuzzyDocs 2 \"color\" idx"
|
||||
(list 1 2 3))
|
||||
(list "fuzzy docs none" "fuzzyDocs 1 \"zzzzz\" idx" (list))
|
||||
(list
|
||||
"fuzzy rank dist 1"
|
||||
"fuzzyRankTfIdf 1 \"color\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"fuzzy rank dist 2"
|
||||
"fuzzyRankTfIdf 2 \"color\" idx"
|
||||
(list 1 2 3))))
|
||||
|
||||
(define
|
||||
fuzzy-results
|
||||
(search-batch fuzzy-setup (map (fn (c) (nth c 1)) fuzzy-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth fuzzy-results i) (nth c 2)))
|
||||
fuzzy-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
Reference in New Issue
Block a user