; feed/content — TF-IDF relevance over activity :tags. Rare tags carry more ; signal, so an activity matching an uncommon tag ranks above one matching a ; common tag. Composes with rank.sx: feed/tfidf-score is just another scorer. ; ; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx ; (feed/-distinct), lib/feed/rank.sx (feed/rank). ; document frequency: tag -> number of activities whose :tags contain it ; (a tag repeated within one activity counts once toward df) (define feed/tag-df (fn (stream) (reduce (fn (df a) (reduce (fn (d t) (assoc d t (+ (get d t 0) 1))) df (feed/-distinct (get a :tags)))) {} (feed/items stream)))) ; inverse document frequency: tag -> log(N / df) (define feed/tag-idf (fn (stream) (let ((n (feed/count stream)) (df (feed/tag-df stream))) (reduce (fn (idf t) (assoc idf t (log (/ n (get df t))))) {} (keys df))))) ; term frequency within one activity: tag -> occurrence count (define feed/-tf (fn (a) (reduce (fn (tf t) (assoc tf t (+ (get tf t 0) 1))) {} (get a :tags)))) ; relevance of an activity to a query (list of tags) given precomputed idf: ; sum over query tags of tf(tag in activity) * idf(tag in corpus) (define feed/tfidf-score (fn (idf query) (fn (a) (let ((tf (feed/-tf a))) (reduce (fn (acc t) (+ acc (* (get tf t 0) (get idf t 0)))) 0 query))))) ; rank a stream by relevance to query tags (idf computed over the stream itself) (define feed/by-relevance (fn (stream query) (feed/rank stream (feed/tfidf-score (feed/tag-idf stream) query))))