Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 46s
Query tokenizer + recursive-descent parser: OR<AND<NOT precedence, implicit AND on adjacency, quoted phrases, parens, case-insensitive keywords. parseQuery, searchQuery, showQ. Worked around haskell-on-sx parser limits (ord-based delimiters; multi-clause fns instead of []-pattern case alts). 78/78. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
19 lines
3.3 KiB
Plaintext
19 lines
3.3 KiB
Plaintext
;; search query parser — Haskell source fragment. Depends on tokenize + query.
|
|
;; Grammar (precedence OR < AND < NOT):
|
|
;; expr = orExpr
|
|
;; orExpr = andExpr (OR andExpr)*
|
|
;; andExpr= notExpr ((AND | <implicit>) notExpr)* -- adjacency means AND
|
|
;; notExpr= NOT notExpr | atom
|
|
;; atom = '(' expr ')' | '"' word+ '"' | word
|
|
;; Keywords AND/OR/NOT are case-insensitive; bare words are normalized via tokens.
|
|
;; Gotchas: delimiters matched by ord (escaped char literals like '\"' break the
|
|
;; haskell-on-sx tokenizer); an [] *pattern* inside a `case` alt also breaks the
|
|
;; parser, so qNormTerm/qDropRP/showQ are written as multi-clause functions.
|
|
;; parseQuery :: String -> Query
|
|
;; searchQuery :: String -> Index -> [DocId]
|
|
;; showQ :: Query -> String -- canonical render for tests/debug
|
|
|
|
(define
|
|
search/parse-src
|
|
"data QTok = TAnd | TOr | TNot | TLP | TRP | TWord String | TPhrase [String]\nqIsSpace c = ord c == 32\nqIsLP c = ord c == 40\nqIsRP c = ord c == 41\nqIsQuote c = ord c == 34\nqDelim c = qIsSpace c || qIsLP c || qIsRP c || qIsQuote c\nqReadWord [] = ([], [])\nqReadWord (c:cs) = if qDelim c then ([], c:cs) else let (w, rest) = qReadWord cs in (c:w, rest)\nqReadPhrase [] = ([], [])\nqReadPhrase (c:cs) = if qIsQuote c then ([], cs) else let (w, rest) = qReadPhrase cs in (c:w, rest)\ntoUpperCh c = chr (toUpper (ord c))\nqUpper w = joinChars (map toUpperCh w)\nqFirstTok [] = \"\"\nqFirstTok (x:xs) = x\nqNormTerm w = qFirstTok (tokens w)\nqClassify w = if qUpper w == \"AND\" then TAnd else if qUpper w == \"OR\" then TOr else if qUpper w == \"NOT\" then TNot else TWord (qNormTerm w)\nqPhraseTok cs = let (p, rest) = qReadPhrase cs in TPhrase (tokens p) : qtokens rest\nqWordTok cs = let (w, rest) = qReadWord cs in qClassify w : qtokens rest\nqtokens [] = []\nqtokens (c:cs) = if qIsSpace c then qtokens cs else if qIsLP c then TLP : qtokens cs else if qIsRP c then TRP : qtokens cs else if qIsQuote c then qPhraseTok cs else qWordTok (c:cs)\nqDropRP (q, (TRP:rest)) = (q, rest)\nqDropRP (q, ts) = (q, ts)\nparseAtom [] = (Term \"\", [])\nparseAtom (TLP:ts) = qDropRP (parseExpr ts)\nparseAtom (TPhrase ps : ts) = (Phrase ps, ts)\nparseAtom (TWord w : ts) = (Term w, ts)\nparseAtom ts = (Term \"\", ts)\nqWrapNot (q, ts) = (Not q, ts)\nparseNot (TNot:ts) = qWrapNot (parseNot ts)\nparseNot ts = parseAtom ts\nqStartsAtom (TWord w : ts) = True\nqStartsAtom (TPhrase p : ts) = True\nqStartsAtom (TLP : ts) = True\nqStartsAtom (TNot : ts) = True\nqStartsAtom ts = False\nqAndStep left ts = let (r, rest) = parseNot ts in parseAndR (And left r) rest\nparseAndR left (TAnd:ts) = qAndStep left ts\nparseAndR left ts = if qStartsAtom ts then qAndStep left ts else (left, ts)\nparseAnd ts = let (l, rest) = parseNot ts in parseAndR l rest\nparseOrR left (TOr:ts) = let (r, rest) = parseAnd ts in parseOrR (Or left r) rest\nparseOrR left ts = (left, ts)\nparseExpr ts = let (l, rest) = parseAnd ts in parseOrR l rest\nparseQuery s = fst (parseExpr (qtokens s))\nsearchQuery s idx = evalQuery idx (parseQuery s)\njoinSp [] = \"\"\njoinSp [x] = x\njoinSp (x:xs) = x ++ \"-\" ++ joinSp xs\nshowQ (Term t) = \"T:\" ++ t\nshowQ (And a b) = \"(\" ++ showQ a ++ \" & \" ++ showQ b ++ \")\"\nshowQ (Or a b) = \"(\" ++ showQ a ++ \" | \" ++ showQ b ++ \")\"\nshowQ (Not a) = \"!\" ++ showQ a\nshowQ (Phrase ts) = \"P:\" ++ joinSp ts\n")
|