Files
rose-ash/lib/feed/content.sx
giles a979297959
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 43s
feed: TF-IDF content ranking over :tags — tag-df/idf, tfidf-score, by-relevance + 15 tests
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 16:50:36 +00:00

69 lines
1.7 KiB
Plaintext

; feed/content — TF-IDF relevance over activity :tags. Rare tags carry more
; signal, so an activity matching an uncommon tag ranks above one matching a
; common tag. Composes with rank.sx: feed/tfidf-score is just another scorer.
;
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
; (feed/-distinct), lib/feed/rank.sx (feed/rank).
; document frequency: tag -> number of activities whose :tags contain it
; (a tag repeated within one activity counts once toward df)
(define
feed/tag-df
(fn
(stream)
(reduce
(fn
(df a)
(reduce
(fn (d t) (assoc d t (+ (get d t 0) 1)))
df
(feed/-distinct (get a :tags))))
{}
(feed/items stream))))
; inverse document frequency: tag -> log(N / df)
(define
feed/tag-idf
(fn
(stream)
(let
((n (feed/count stream)) (df (feed/tag-df stream)))
(reduce
(fn (idf t) (assoc idf t (log (/ n (get df t)))))
{}
(keys df)))))
; term frequency within one activity: tag -> occurrence count
(define
feed/-tf
(fn
(a)
(reduce
(fn (tf t) (assoc tf t (+ (get tf t 0) 1)))
{}
(get a :tags))))
; relevance of an activity to a query (list of tags) given precomputed idf:
; sum over query tags of tf(tag in activity) * idf(tag in corpus)
(define
feed/tfidf-score
(fn
(idf query)
(fn
(a)
(let
((tf (feed/-tf a)))
(reduce
(fn
(acc t)
(+ acc (* (get tf t 0) (get idf t 0))))
0
query)))))
; rank a stream by relevance to query tags (idf computed over the stream itself)
(define
feed/by-relevance
(fn
(stream query)
(feed/rank stream (feed/tfidf-score (feed/tag-idf stream) query))))