-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcrawlFirehose.py
59 lines (48 loc) · 1.79 KB
/
crawlFirehose.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import json
import os
import sys
from time import localtime, strftime, sleep
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
# Twitter API user credentials
vault = json.load(open("credentials.json"))
TWITTER_ACCESS_TOKEN = vault["TWITTER_ACCESS_TOKEN"]
TWITTER_TOKEN_SECRET = vault["TWITTER_TOKEN_SECRET"]
TWITTER_CONSUMER_KEY = vault["TWITTER_CONSUMER_KEY"]
TWITTER_CONSUMER_SECRET = vault["TWITTER_CONSUMER_SECRET"]
def show_message(message):
sys.stdout.write("[%s] " % strftime("%Y-%m-%d %H:%M:%S", localtime()))
sys.stdout.write(message + "\n")
sys.stdout.flush()
class StdOutListener(StreamListener):
def __init__(self):
self._420 = 0
self._okay_count = 0
def on_data(self, data):
print data
self._okay_count += 1
if self._okay_count >= 200:
self._420 = 0
return True
def on_error(self, status_code):
show_message("Error code: %d" % status_code)
if status_code == 420:
self._okay_count = 0
self._420 = self._420 + 1
show_message("Sleeping %d seconds before restart..." % (60 * self._420))
sleep(60 * self._420)
if __name__ == '__main__':
# Twitter authentication
auth = OAuthHandler(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET)
auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_TOKEN_SECRET)
listener = StdOutListener()
stream = Stream(auth, listener)
while True:
try:
show_message('(Re)start streaming...')
LOCATIONS = [-124.85, 24.39, -66.88, 49.38]
stream.filter(locations=LOCATIONS)
except Exception as e:
# Keep the crawler running without being interupted by exceptions
show_message("Exception: " + str(e))