feed: TF-IDF content ranking over :tags — tag-df/idf, tfidf-score, by-relevance + 15 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 43s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 43s
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
68
lib/feed/content.sx
Normal file
68
lib/feed/content.sx
Normal file
@@ -0,0 +1,68 @@
|
||||
; feed/content — TF-IDF relevance over activity :tags. Rare tags carry more
|
||||
; signal, so an activity matching an uncommon tag ranks above one matching a
|
||||
; common tag. Composes with rank.sx: feed/tfidf-score is just another scorer.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-distinct), lib/feed/rank.sx (feed/rank).
|
||||
|
||||
; document frequency: tag -> number of activities whose :tags contain it
|
||||
; (a tag repeated within one activity counts once toward df)
|
||||
(define
|
||||
feed/tag-df
|
||||
(fn
|
||||
(stream)
|
||||
(reduce
|
||||
(fn
|
||||
(df a)
|
||||
(reduce
|
||||
(fn (d t) (assoc d t (+ (get d t 0) 1)))
|
||||
df
|
||||
(feed/-distinct (get a :tags))))
|
||||
{}
|
||||
(feed/items stream))))
|
||||
|
||||
; inverse document frequency: tag -> log(N / df)
|
||||
(define
|
||||
feed/tag-idf
|
||||
(fn
|
||||
(stream)
|
||||
(let
|
||||
((n (feed/count stream)) (df (feed/tag-df stream)))
|
||||
(reduce
|
||||
(fn (idf t) (assoc idf t (log (/ n (get df t)))))
|
||||
{}
|
||||
(keys df)))))
|
||||
|
||||
; term frequency within one activity: tag -> occurrence count
|
||||
(define
|
||||
feed/-tf
|
||||
(fn
|
||||
(a)
|
||||
(reduce
|
||||
(fn (tf t) (assoc tf t (+ (get tf t 0) 1)))
|
||||
{}
|
||||
(get a :tags))))
|
||||
|
||||
; relevance of an activity to a query (list of tags) given precomputed idf:
|
||||
; sum over query tags of tf(tag in activity) * idf(tag in corpus)
|
||||
(define
|
||||
feed/tfidf-score
|
||||
(fn
|
||||
(idf query)
|
||||
(fn
|
||||
(a)
|
||||
(let
|
||||
((tf (feed/-tf a)))
|
||||
(reduce
|
||||
(fn
|
||||
(acc t)
|
||||
(+ acc (* (get tf t 0) (get idf t 0))))
|
||||
0
|
||||
query)))))
|
||||
|
||||
; rank a stream by relevance to query tags (idf computed over the stream itself)
|
||||
(define
|
||||
feed/by-relevance
|
||||
(fn
|
||||
(stream query)
|
||||
(feed/rank stream (feed/tfidf-score (feed/tag-idf stream) query))))
|
||||
Reference in New Issue
Block a user