-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtweet_scrape.py
86 lines (66 loc) · 2.12 KB
/
tweet_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import tweepy
from tweepy import OAuthHandler
import json
import os
import sys
import pandas as pd
from json_parse import write_json_tweets
def load_api():
consumer_key=''
consumer_secret=''
access_token=''
access_secret=''
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
return tweepy.API(auth)
def tweet_search(api, query, max_tweets):
searched_tweets = []
while len(searched_tweets) < max_tweets:
remaining_tweets = max_tweets - len(searched_tweets)
try:
new_tweets = api.search(q=query, count=remaining_tweets,tweet_mode='extended',language='English')
print('found',len(new_tweets),'tweets')
if not new_tweets:
print('no tweets found')
break
searched_tweets.extend(new_tweets)
except tweepy.TweepError:
print('exception raised, waiting 15 minutes')
time.sleep(15*60)
break
return searched_tweets
def write_tweets(filename,tweets):
with open ("tweets.txt","a") as e:
for tweet in tweets:
tj=tweet._json
e.write(tj['full_text'])
e.write('\n')
def get_search_query():
with open("words.txt") as f:
query=f.read()
queries=[]
for x in query.split(','):
queries.append(x)
return queries
def main():
exitcount = 0
max_tweets=5
q=get_search_query()
for search_phrase in q:
print('Search phrase =', search_phrase)
api = load_api()
tweets= tweet_search(api, search_phrase, max_tweets)
if tweets:
write_tweets(filename,tweets)
write_json_tweets(filename,tweets)
exitcount = 0
else:
exitcount += 1
if exitcount == 3:
if search_phrase == search_phrases[-1]:
sys.exit('Maximum number of empty tweet strings reached - exiting')
else:
print('Maximum number of empty tweet strings reached - breaking')
break
if __name__ == "__main__":
main()