-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_timeline.py
76 lines (61 loc) · 2.06 KB
/
get_timeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# coding: utf-8
# -*- coding: utf-8 -*-
import urllib;
import twitter_gender;
import json;
import MeCab;
append_flag = 1;
twitter_id = "pinkroot";
url = "http://api.twitter.com/1/statuses/user_timeline.json";
tagger = MeCab.Tagger('-Owakati');
count = {};
count["m"] = 0;
count["f"] = 0;
word_count = {};
word_count["m"] = {};
word_count["f"] = {};
word_count["trend"] = {};
twitter_list = twitter_gender.get_genderlist();
fp = open("gender_Analysis_female.csv", "w");
mp = open("gender_Analysis_male.csv", "w");
trendp = open("gender_Analysis_trend.csv", "w");
def my_mecab(text):
encoded_text = text.encode('utf-8');
encoded_result = tagger.parse(encoded_text);
result = encoded_result.decode('utf-8');
word_list = result.split(' ');
return (word_list);
for tid, gender in twitter_list.items():
print tid;
param = {"screen_name" : tid, "count" : "200"};
res = urllib.urlopen(url + "?" + urllib.urlencode(param));
tweets_json = json.load(res);
tweets_text = [];
for tweet_json in tweets_json:
tweet = tweet_json["text"];
word_list = my_mecab(tweet);
for word in word_list:
count[gender] += 1;
if (word_count[gender].has_key(word) == True):
word_count[gender][word] += 1;
else:
word_count[gender][word] = 1;
for key, value in sorted(word_count["f"].items(), key=lambda x:x[1]):
print "%s:%d" % (key, value);
if (value > 10 and key != "\n"):
word_count["trend"][key] = float(value) / count["f"];
fp.write(("%s,%f\n" % (key, float(value) / count["f"])).encode("utf-8"));
for key, value in sorted(word_count["m"].items(), key=lambda x:x[1]):
if (value > 10 and key != "\n"):
mp.write(("%s,%f\n" % (key, float(value) / count["m"])).encode("utf-8"));
if (word_count["trend"].has_key(key) == True):
word_count["trend"][key] -= (float(value) / count["m"]);
else:
word_count["trend"][key] = float(value) / count["m"];
fp.write("%d" % count["f"]);
mp.write("%d" % count["m"]);
for key, value in sorted(word_count["trend"].items(), key=lambda x:x[1]):
mfp.write(("%s,%f\n" % (key, value)).encode("utf-8"));
fp.close();
mp.close();
trendp.close();