Files
rose-ash/lib/search/tests/fuzzy.sx
giles 5945b51cfd
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 41s
search: fuzzy matching via edit distance + 18 tests
editDist as an O(m*n) row-based Levenshtein DP (naive recursion is exponential
and times out under load); fuzzyTerms/fuzzyDocs/fuzzyRankTfIdf expand a term to
indexed terms within a max edit distance. 166/166.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 21:47:56 +00:00

75 lines
2.1 KiB
Plaintext

;; Extension — fuzzy matching via Levenshtein edit distance.
;; Corpus: 1 "color flavor" 2 "colour kitten" 3 "colored"
;; allTerms: color colored colour flavor kitten
(define
fuzzy-setup
"idx = indexDoc 3 \"colored\" (indexDoc 2 \"colour kitten\" (indexDoc 1 \"color flavor\" emptyIndex))\n")
(define
fuzzy-cases
(list
(list
"editDist substitution"
"[editDist \"kitten\" \"sitten\"]"
(list 1))
(list "editDist equal" "[editDist \"abc\" \"abc\"]" (list 0))
(list "editDist deletion" "[editDist \"abc\" \"ab\"]" (list 1))
(list "editDist insertion" "[editDist \"ab\" \"abc\"]" (list 1))
(list "editDist from empty" "[editDist \"\" \"abc\"]" (list 3))
(list "editDist both empty" "[editDist \"\" \"\"]" (list 0))
(list
"editDist classic"
"[editDist \"kitten\" \"sitting\"]"
(list 3))
(list
"editDist color colour"
"[editDist \"color\" \"colour\"]"
(list 1))
(list
"editDist color colored"
"[editDist \"color\" \"colored\"]"
(list 2))
(list
"fuzzy terms dist 1"
"fuzzyTerms 1 \"color\" idx"
(list "color" "colour"))
(list
"fuzzy terms dist 2"
"fuzzyTerms 2 \"color\" idx"
(list "color" "colored" "colour"))
(list "fuzzy terms exact" "fuzzyTerms 0 \"color\" idx" (list "color"))
(list
"fuzzy terms other word"
"fuzzyTerms 1 \"flavour\" idx"
(list "flavor"))
(list
"fuzzy docs dist 1"
"fuzzyDocs 1 \"color\" idx"
(list 1 2))
(list
"fuzzy docs dist 2"
"fuzzyDocs 2 \"color\" idx"
(list 1 2 3))
(list "fuzzy docs none" "fuzzyDocs 1 \"zzzzz\" idx" (list))
(list
"fuzzy rank dist 1"
"fuzzyRankTfIdf 1 \"color\" idx"
(list 1 2))
(list
"fuzzy rank dist 2"
"fuzzyRankTfIdf 2 \"color\" idx"
(list 1 2 3))))
(define
fuzzy-results
(search-batch fuzzy-setup (map (fn (c) (nth c 1)) fuzzy-cases)))
(map-indexed
(fn
(i c)
(hk-test (nth c 0) (nth fuzzy-results i) (nth c 2)))
fuzzy-cases)
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}