Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 43s
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
86 lines
2.5 KiB
Plaintext
86 lines
2.5 KiB
Plaintext
; Follow-up — TF-IDF content ranking over :tags. (feed-test name got expected)
|
|
|
|
(define
|
|
corpus
|
|
(feed/stream
|
|
(list
|
|
(feed/normalize {:actor "u" :object "o1" :at 10 :tags (list "cats" "funny")})
|
|
(feed/normalize {:actor "u" :object "o2" :at 20 :tags (list "cats" "news")})
|
|
(feed/normalize {:actor "u" :object "o3" :at 30 :tags (list "politics" "news")})
|
|
(feed/normalize {:actor "u" :object "o4" :at 40 :tags (list "cats")}))))
|
|
|
|
; ---------- document frequency ----------
|
|
|
|
(feed-test "df cats" (get (feed/tag-df corpus) "cats") 3)
|
|
(feed-test "df news" (get (feed/tag-df corpus) "news") 2)
|
|
(feed-test "df funny" (get (feed/tag-df corpus) "funny") 1)
|
|
(feed-test "df politics" (get (feed/tag-df corpus) "politics") 1)
|
|
(feed-test "df full" (feed/tag-df corpus) {:news 2 :funny 1 :politics 1 :cats 3})
|
|
|
|
; ---------- inverse document frequency ----------
|
|
|
|
(feed-test
|
|
"idf news = log(4/2)"
|
|
(get (feed/tag-idf corpus) "news")
|
|
(log 2))
|
|
(feed-test
|
|
"idf funny = log(4/1)"
|
|
(get (feed/tag-idf corpus) "funny")
|
|
(log 4))
|
|
(feed-test
|
|
"rarer tag has higher idf"
|
|
(>
|
|
(get (feed/tag-idf corpus) "funny")
|
|
(get (feed/tag-idf corpus) "cats"))
|
|
true)
|
|
|
|
; ---------- tf-idf scoring ----------
|
|
|
|
(define idf (feed/tag-idf corpus))
|
|
|
|
(feed-test
|
|
"score query funny on o1"
|
|
((feed/tfidf-score idf (list "funny")) (feed/normalize {:actor "u" :object "x" :tags (list "cats" "funny")}))
|
|
(log 4))
|
|
(feed-test
|
|
"score query funny on non-match"
|
|
((feed/tfidf-score idf (list "funny")) (feed/normalize {:actor "u" :object "x" :tags (list "cats")}))
|
|
0)
|
|
(feed-test
|
|
"unknown query tag scores 0"
|
|
((feed/tfidf-score idf (list "zzz")) (feed/normalize {:actor "u" :object "x" :tags (list "cats")}))
|
|
0)
|
|
|
|
; ---------- ranking by relevance ----------
|
|
|
|
; query news: o2,o3 match (score log2), o1,o4 don't (0); ties break by :at desc
|
|
(feed-test
|
|
"by-relevance news order"
|
|
(map
|
|
(fn (a) (get a :object))
|
|
(feed/items (feed/by-relevance corpus (list "news"))))
|
|
(list "o3" "o2" "o4" "o1"))
|
|
|
|
; query funny: only o1 matches -> ranks first
|
|
(feed-test
|
|
"by-relevance funny first"
|
|
(get
|
|
(nth (feed/items (feed/by-relevance corpus (list "funny"))) 0)
|
|
:object)
|
|
"o1")
|
|
|
|
; query (cats news): o2 carries both tags -> highest combined tf-idf
|
|
(feed-test
|
|
"by-relevance cats+news top"
|
|
(get
|
|
(nth
|
|
(feed/items (feed/by-relevance corpus (list "cats" "news")))
|
|
0)
|
|
:object)
|
|
"o2")
|
|
|
|
(feed-test
|
|
"by-relevance preserves count"
|
|
(feed/count (feed/by-relevance corpus (list "cats")))
|
|
4)
|