search: Phase 2 query parser + 32 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 46s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 46s
Query tokenizer + recursive-descent parser: OR<AND<NOT precedence, implicit AND on adjacency, quoted phrases, parens, case-insensitive keywords. parseQuery, searchQuery, showQ. Worked around haskell-on-sx parser limits (ord-based delimiters; multi-clause fns instead of []-pattern case alts). 78/78. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
139
lib/search/tests/parse.sx
Normal file
139
lib/search/tests/parse.sx
Normal file
@@ -0,0 +1,139 @@
|
||||
;; Phase 2 — query parser (parseQuery / searchQuery).
|
||||
;; AST cases assert showQ (parseQuery s); search cases assert searchQuery s idx
|
||||
;; against the standard corpus. Each group runs in one batched program eval.
|
||||
;; doc 1 "the quick brown dog" doc 2 "a quick brown fox" doc 3 "the dog barks loudly"
|
||||
|
||||
(define
|
||||
parse-corpus
|
||||
"idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
ast-cases
|
||||
(list
|
||||
(list "single term" "showQ (parseQuery \"cat\")" "T:cat")
|
||||
(list "term normalized" "showQ (parseQuery \"CAT\")" "T:cat")
|
||||
(list "explicit and" "showQ (parseQuery \"cat AND dog\")" "(T:cat & T:dog)")
|
||||
(list
|
||||
"lowercase and keyword"
|
||||
"showQ (parseQuery \"cat and dog\")"
|
||||
"(T:cat & T:dog)")
|
||||
(list "implicit and" "showQ (parseQuery \"cat dog\")" "(T:cat & T:dog)")
|
||||
(list "or" "showQ (parseQuery \"cat OR dog\")" "(T:cat | T:dog)")
|
||||
(list "not" "showQ (parseQuery \"NOT cat\")" "!T:cat")
|
||||
(list
|
||||
"and binds tighter than or"
|
||||
"showQ (parseQuery \"cat AND dog OR bird\")"
|
||||
"((T:cat & T:dog) | T:bird)")
|
||||
(list
|
||||
"or then and"
|
||||
"showQ (parseQuery \"cat OR dog AND bird\")"
|
||||
"(T:cat | (T:dog & T:bird))")
|
||||
(list
|
||||
"parens override precedence"
|
||||
"showQ (parseQuery \"(cat OR dog) AND bird\")"
|
||||
"((T:cat | T:dog) & T:bird)")
|
||||
(list
|
||||
"and with not"
|
||||
"showQ (parseQuery \"cat AND NOT dog\")"
|
||||
"(T:cat & !T:dog)")
|
||||
(list
|
||||
"two-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown\\\"\")"
|
||||
"P:quick-brown")
|
||||
(list
|
||||
"three-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown fox\\\"\")"
|
||||
"P:quick-brown-fox")
|
||||
(list
|
||||
"and left-assoc"
|
||||
"showQ (parseQuery \"a AND b AND c\")"
|
||||
"((T:a & T:b) & T:c)")
|
||||
(list
|
||||
"or left-assoc"
|
||||
"showQ (parseQuery \"a OR b OR c\")"
|
||||
"((T:a | T:b) | T:c)")
|
||||
(list
|
||||
"punctuation stripped"
|
||||
"showQ (parseQuery \"cat, dog!\")"
|
||||
"(T:cat & T:dog)")))
|
||||
|
||||
(define
|
||||
search-cases
|
||||
(list
|
||||
(list "term" "searchQuery \"quick\" idx" (list 1 2))
|
||||
(list
|
||||
"term normalized"
|
||||
"searchQuery \"QUICK\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"explicit and"
|
||||
"searchQuery \"quick AND brown\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"implicit and"
|
||||
"searchQuery \"quick brown\" idx"
|
||||
(list 1 2))
|
||||
(list "and disjoint" "searchQuery \"the AND fox\" idx" (list))
|
||||
(list "or" "searchQuery \"fox OR barks\" idx" (list 2 3))
|
||||
(list "not" "searchQuery \"NOT the\" idx" (list 2))
|
||||
(list "and not" "searchQuery \"quick AND NOT the\" idx" (list 2))
|
||||
(list
|
||||
"precedence and-or"
|
||||
"searchQuery \"the AND dog OR fox\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"precedence or-and"
|
||||
"searchQuery \"fox OR the AND dog\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"parens"
|
||||
"searchQuery \"the AND (dog OR fox)\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"phrase"
|
||||
"searchQuery \"\\\"quick brown\\\"\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"phrase one doc"
|
||||
"searchQuery \"\\\"brown dog\\\"\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase and term"
|
||||
"searchQuery \"\\\"quick brown\\\" AND dog\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"not phrase"
|
||||
"searchQuery \"NOT \\\"quick brown\\\"\" idx"
|
||||
(list 3))
|
||||
(list
|
||||
"implicit and terms"
|
||||
"searchQuery \"dog barks\" idx"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
ast-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) ast-cases)))
|
||||
(define
|
||||
search-results
|
||||
(search-batch
|
||||
parse-corpus
|
||||
(map (fn (c) (nth c 1)) search-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "ast: " (nth c 0))
|
||||
(nth ast-results i)
|
||||
(nth c 2)))
|
||||
ast-cases)
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "search: " (nth c 0))
|
||||
(nth search-results i)
|
||||
(nth c 2)))
|
||||
search-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
Reference in New Issue
Block a user