; lib/gitea/search.sx — sx-gitea Phase 7: code + issue/PR search. ; ; search-on-sx (haskell-on-sx) does the heavy lifting: the forge builds a ; document corpus SX-side — code files from the default branch head ; (path + blob text), issues (title + body + comment bodies), PRs (title ; + body + review bodies) — embeds it as one Haskell program, and asks ; searchRankTfIdf for ranked doc ids. Queries speak the search query ; language (terms, AND/OR/NOT, "phrases"). ; ; Cost model: ONE evaluation parses the Haskell search layers (~seconds), ; extra queries are nearly free. So the core primitive is ; gitea/search-multi — ANY number of corpora and queries in a single ; evaluation (each corpus becomes an idxN binding) — and everything else ; sugars over it. Only the six layers searchRankTfIdf needs are compiled ; (tokenize/index/query/parse/rank/rankq), not the full search/src. ; ; Requires: lib/gitea/{repo,access,web,issues,pr}.sx and their stacks, ; lib/haskell/* + lib/search/{tokenize,index,query,parse,rank,rankq, ; testlib}.sx (search-hk->sx, hk-core, hk-eval-program). ; ── corpus ─────────────────────────────────────────────────────────── (define gitea/code-docs (fn (forge owner name) (let ((rec (gitea/repo-get forge owner name))) (if (nil? rec) (list) (let ((grepo (gitea/repo-git forge owner name))) (let ((head-cid (git/branch-get grepo (get rec :default-branch)))) (if (nil? head-cid) (list) (let ((files (git/commit-files grepo head-cid))) (map (fn (p) {:text (str p " " (get files p)) :kind "code" :ref p}) (artdag/sort-strings (keys files))))))))))) (define gitea/issue-docs (fn (forge owner name) (map (fn (r) {:text (str (get r :title) " " (get r :body) " " (join " " (map (fn (c) (get c :body)) (get r :comments)))) :kind "issue" :ref (str (get r :number))}) (gitea/issue-records forge owner name)))) (define gitea/pr-search-docs (fn (forge owner name) (map (fn (r) {:text (str (get r :title) " " (get r :body) " " (join " " (map (fn (v) (get v :body)) (get r :reviews)))) :kind "pr" :ref (str (get r :number))}) (gitea/pr-records forge owner name)))) (define gitea/search-kinds-all (list "code" "issue" "pr")) (define gitea/repo-docs (fn (forge owner name kinds) (filter (fn (d) (contains? kinds (get d :kind))) (concat (gitea/code-docs forge owner name) (concat (gitea/issue-docs forge owner name) (gitea/pr-search-docs forge owner name)))))) (define gitea/visible-docs (fn (forge user kinds) (reduce (fn (acc full) (let ((p (gitea/split-full full))) (concat acc (map (fn (d) (assoc d :repo full)) (gitea/repo-docs forge (get p :owner) (get p :name) kinds))))) (list) (gitea/visible-repos forge user)))) ; ── haskell program assembly ───────────────────────────────────────── ; only the layers searchRankTfIdf needs — parsing is the dominant cost (define gitea/search-src (str search/tokenize-src "\n" search/index-src "\n" search/query-src "\n" search/parse-src "\n" search/rank-src "\n" search/rankq-src "\n")) (define gitea/hk-escape (fn (s) (replace (replace (replace (replace (or s "") "\\" "\\\\") "\"" "\\\"") "\n" "\\n") "\t" "\\t"))) ; = indexDoc k "text_k" ( ... (indexDoc 1 "text_1" emptyIndex)) (define gitea/index-binding (fn (idxname docs) (str idxname " = " (reduce (fn (acc pair) (str "indexDoc " (first pair) " \"" (gitea/hk-escape (get (nth pair 1) :text)) "\" (" acc ")")) "emptyIndex" (map-indexed (fn (i d) (list (+ i 1) d)) docs)) "\n"))) (define gitea/hk-search-eval (fn (extra) (search-hk->sx (hk-deep-force (get (hk-eval-program (hk-core (str gitea/search-src extra))) "result"))))) ; THE core: corpora = (docs ...), specs = ({:corpus i :query q :n n} ...). ; One haskell evaluation; => one ranked {:kind :ref (:repo)} list per spec. (define gitea/search-multi (fn (corpora specs) (let ((bindings (join "" (map-indexed (fn (i docs) (gitea/index-binding (str "idx" i) docs)) corpora))) (exprs (map (fn (s) (if (empty? (nth corpora (get s :corpus))) "[]" (str "take " (get s :n) " (searchRankTfIdf \"" (gitea/hk-escape (get s :query)) "\" idx" (get s :corpus) ")"))) specs))) (let ((id-lists (gitea/hk-search-eval (str bindings "result = [" (join ", " exprs) "]\n")))) (map-indexed (fn (si ids) (let ((docs (nth corpora (get (nth specs si) :corpus)))) (map (fn (i) (let ((d (nth docs (- i 1)))) (if (get d :repo) {:repo (get d :repo) :kind (get d :kind) :ref (get d :ref)} {:kind (get d :kind) :ref (get d :ref)}))) ids))) id-lists))))) ; many queries against ONE corpus, still one evaluation (define gitea/search-many (fn (docs queries n) (if (empty? docs) (map (fn (q) (list)) queries) (gitea/search-multi (list docs) (map (fn (q) {:n n :query q :corpus 0}) queries))))) ; ── repo-scoped search ─────────────────────────────────────────────── (define gitea/search-repo (fn (forge owner name query kinds n) (first (gitea/search-many (gitea/repo-docs forge owner name kinds) (list query) n)))) (define gitea/search-code (fn (forge owner name query n) (gitea/search-repo forge owner name query (list "code") n))) (define gitea/search-issues (fn (forge owner name query n) (gitea/search-repo forge owner name query (list "issue") n))) (define gitea/search-prs (fn (forge owner name query n) (gitea/search-repo forge owner name query (list "pr") n))) ; ── global search over visible repos ───────────────────────────────── (define gitea/search-visible (fn (forge user query kinds n) (first (gitea/search-many (gitea/visible-docs forge user kinds) (list query) n)))) ; ── web ────────────────────────────────────────────────────────────── (define gitea/w-query-param (fn (req name) (get (get req :query) name))) (define gitea/w-search-kinds (fn (req) (let ((k (gitea/w-query-param req "kind"))) (if (contains? gitea/search-kinds-all k) (list k) gitea/search-kinds-all)))) (define gitea/w-search-item (fn (owner name r) (let ((kind (get r :kind)) (ref (get r :ref))) (cond ((= kind "code") (str "
  • " (dream-escape ref) "
  • ")) ((= kind "issue") (str "
  • #" ref "
  • ")) (else (str "
  • #" ref "
  • ")))))) (define gitea/w-search-page (fn (forge req) (let ((owner (dream-param req "owner")) (name (dream-param req "name")) (q (gitea/w-query-param req "q"))) (cond ((not (gitea/w-readable? forge req owner name)) (dream-not-found)) ((or (nil? q) (= q "")) (gitea/w-json-status 400 {:error "missing-query"})) (else (gitea/w-page (str "search: " q) (str "

    Search

    " (dream-escape q) "

    "))))))) (define gitea/w-api-search-repo (fn (forge req) (let ((owner (dream-param req "owner")) (name (dream-param req "name")) (q (gitea/w-query-param req "q"))) (cond ((not (gitea/w-readable? forge req owner name)) (dream-not-found)) ((or (nil? q) (= q "")) (gitea/w-json-status 400 {:error "missing-query"})) (else (dream-json-value (gitea/search-repo forge owner name q (gitea/w-search-kinds req) 20))))))) (define gitea/w-api-search (fn (forge req) (let ((q (gitea/w-query-param req "q"))) (if (or (nil? q) (= q "")) (gitea/w-json-status 400 {:error "missing-query"}) (dream-json-value (gitea/search-visible forge (gitea/w-user forge req) q (gitea/w-search-kinds req) 20)))))) (define gitea/search-routes (fn (forge) (list (dream-get "/:owner/:name/search" (fn (req) (gitea/w-search-page forge req))) (dream-get "/api/repos/:owner/:name/search" (fn (req) (gitea/w-api-search-repo forge req))) (dream-get "/api/search" (fn (req) (gitea/w-api-search forge req)))))) (set! gitea/route-packs (append gitea/route-packs (list gitea/search-routes)))