|
1 |
| -(ns twitterbuzz.stats) |
| 1 | +(ns twitterbuzz.graph) |
2 | 2 |
|
3 | 3 | ;; code review from SDH:
|
4 | 4 | ;;
|
5 |
| -;; 1. I think inc-or-nil could be replaced by fnil - fixed ar |
| 5 | +;; 1. I think inc-or-nil could be replaced by fnil |
6 | 6 | ;; 2. update-retweet-count looks broken
|
7 |
| -;; `when` instead of `if` in function to be reduced over - fixed ar |
| 7 | +;; `when` instead of `if` in function to be reduced over |
8 | 8 | ;; 3. conversion from string keys to keywords should be
|
9 |
| -;; done already before we get here - fixed ar |
| 9 | +;; done already before we get here |
10 | 10 | ;; 4. fns like top-tweets should not take an n arg
|
11 |
| -;; consumers can always take what they want - fixed ar |
12 |
| -;; 5. namespace and file name do not match - fixed ar |
| 11 | +;; consumers can always take what they want |
| 12 | +;; 5. namespace and file name do not match |
| 13 | + |
| 14 | +(defn inc-or-1 |
| 15 | + "Returns 1 if x is nil, (inc x) otherwise" |
| 16 | + [x] |
| 17 | + (if (nil? x) 1 (inc x))) |
13 | 18 |
|
14 | 19 | (defn update-retweet-count
|
15 | 20 | "Updates the total count of retweets by id in m as indicated by the contents of
|
16 | 21 | the tweet t"
|
17 | 22 | [m t]
|
18 |
| - (let [rt-id (get-in t [:retweeted_status :id_str])] |
19 |
| - (if (nil? rt-id) m (update-in m [rt-id] (fnil inc 0))))) |
| 23 | + (let [rt-id (get-in t ["retweeted_status" "id_str"])] |
| 24 | + (when-not (nil? rt-id) (update-in m [rt-id] inc-or-1)))) |
20 | 25 |
|
21 | 26 | (defn top-tweets
|
22 |
| - "Returns the ids of the most retweeted tweets out of tweet seq ts in descending |
23 |
| - order" |
24 |
| - [ts] |
| 27 | + "Returns the ids of top n most retweeted tweets out of tweet seq ts" |
| 28 | + [ts n] |
25 | 29 | (let [retweet-counts (reduce update-retweet-count {} ts)
|
26 | 30 | tweet-ids (keys retweet-counts)]
|
27 |
| - (sort-by #(get retweet-counts %) tweet-ids))) |
28 |
| - |
29 |
| -(defn retweet-of |
30 |
| - "Returns the username of the user who originally authored the tweet |
31 |
| - that t is a retweet of, nil otherwise" |
32 |
| - [t] |
33 |
| - (get-in t [:retweeted_status :user :name])) |
34 |
| - |
35 |
| -(defn mentioned-users |
36 |
| - "Returns the usernames of all the users mentioned in tweet t" |
37 |
| - [t] |
38 |
| - (map #(get % :screen_name) (get-in t [:entities :user_mentions]))) |
39 |
| - |
40 |
| -(defn new-edges |
41 |
| - "Filter the seq of sets n to find all sets in n that do not already |
42 |
| - exist in the seq of sets o" |
43 |
| - [n o] |
44 |
| - (filter #(not (contains? o %)) n)) |
45 |
| - |
46 |
| -(defn edges |
47 |
| - "Return a lazy seq of pairs of usernames representing the social edges of a tweet |
48 |
| - stream, where a social edge is defined as one user retweeting or |
49 |
| - mentioning another user in a given tweet. Edges are not directional; if |
50 |
| - @alice retweets @bob, and @bob mentions @alice, those two tweets only create |
51 |
| - one edge." |
52 |
| - ([ts] |
53 |
| - (edges ts #{})) |
54 |
| - ([ts edges] |
55 |
| - (let [t (first ts) |
56 |
| - other-users (concat (list (retweet-of t)) (mentioned-users t)) |
57 |
| - author (get-in t [:user :name]) |
58 |
| - tweet-edges (set (map #(set (list author %)) other-users)) |
59 |
| - new-edges (new-edges tweet-edges edges)] |
60 |
| - (when t |
61 |
| - (if (empty? new-edges) |
62 |
| - (recur (rest ts) edges) |
63 |
| - (lazy-seq (concat new-edges (edges (rest ts) (concat new-edges edges))))))))) |
| 31 | + (take n (sort-by #(get retweet-counts %) tweet-ids)))) |
64 | 32 |
|
65 | 33 | (defn graph
|
66 | 34 | "Returns a map representing the graph of the tweet seq ts"
|
67 | 35 | [ts]
|
68 |
| - {:nodes {} |
| 36 | + {:nodes [] |
69 | 37 | :edges []})
|
0 commit comments