Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 43s
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
69 lines
1.7 KiB
Plaintext
69 lines
1.7 KiB
Plaintext
; feed/content — TF-IDF relevance over activity :tags. Rare tags carry more
|
|
; signal, so an activity matching an uncommon tag ranks above one matching a
|
|
; common tag. Composes with rank.sx: feed/tfidf-score is just another scorer.
|
|
;
|
|
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
|
; (feed/-distinct), lib/feed/rank.sx (feed/rank).
|
|
|
|
; document frequency: tag -> number of activities whose :tags contain it
|
|
; (a tag repeated within one activity counts once toward df)
|
|
(define
|
|
feed/tag-df
|
|
(fn
|
|
(stream)
|
|
(reduce
|
|
(fn
|
|
(df a)
|
|
(reduce
|
|
(fn (d t) (assoc d t (+ (get d t 0) 1)))
|
|
df
|
|
(feed/-distinct (get a :tags))))
|
|
{}
|
|
(feed/items stream))))
|
|
|
|
; inverse document frequency: tag -> log(N / df)
|
|
(define
|
|
feed/tag-idf
|
|
(fn
|
|
(stream)
|
|
(let
|
|
((n (feed/count stream)) (df (feed/tag-df stream)))
|
|
(reduce
|
|
(fn (idf t) (assoc idf t (log (/ n (get df t)))))
|
|
{}
|
|
(keys df)))))
|
|
|
|
; term frequency within one activity: tag -> occurrence count
|
|
(define
|
|
feed/-tf
|
|
(fn
|
|
(a)
|
|
(reduce
|
|
(fn (tf t) (assoc tf t (+ (get tf t 0) 1)))
|
|
{}
|
|
(get a :tags))))
|
|
|
|
; relevance of an activity to a query (list of tags) given precomputed idf:
|
|
; sum over query tags of tf(tag in activity) * idf(tag in corpus)
|
|
(define
|
|
feed/tfidf-score
|
|
(fn
|
|
(idf query)
|
|
(fn
|
|
(a)
|
|
(let
|
|
((tf (feed/-tf a)))
|
|
(reduce
|
|
(fn
|
|
(acc t)
|
|
(+ acc (* (get tf t 0) (get idf t 0))))
|
|
0
|
|
query)))))
|
|
|
|
; rank a stream by relevance to query tags (idf computed over the stream itself)
|
|
(define
|
|
feed/by-relevance
|
|
(fn
|
|
(stream query)
|
|
(feed/rank stream (feed/tfidf-score (feed/tag-idf stream) query))))
|