-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtwizzer.py
178 lines (114 loc) · 4.58 KB
/
twizzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
""""
Author: Swapnil Shinde
Vscale Consulting LLP
"""
import tweepy
import gspread
import time
class VscaleTwitterBot():
def __init__(self,gc_credential_file_name,spreadsheet_url
,tweepy_consumer_key,tweepy_consumer_secret,
tweepy_callback_uri):
'''
gc -> auth to service account
spreadsheet_url -> spread sheet url
tweepy_credentials -> dict of tweepy credentials containing consumer_key,secret_key,callback_uri
'''
self.tweet_links = []
self.result_worksheet = None #result worksheet object
self.scraped_till = 2
self.spreadsheet_url = spreadsheet_url
self.gc = gspread.service_account(filename=gc_credential_file_name)
self.tweepy_credentials = {
'consumer_key':tweepy_consumer_key,
'consumer_secret_key':tweepy_consumer_secret,
'callback_url':tweepy_callback_uri
}
# return tweepy api obj
def get_api(self)->object:
'''
takes tweepy credentials and return api object for tweepy api calls
'''
consumer_key = self.tweepy_credentials['consumer_key']
consumer_secret = self.tweepy_credentials['consumer_secret_key']
callback_uri = self.tweepy_credentials['callback_url']
auth = tweepy.OAuthHandler(consumer_key,consumer_secret,callback=callback_uri)
return tweepy.API(auth)
def get_tweet_info(self,tweet_id)->dict:
'''
returns scrapped dict and influencer status id of given tweet status id.
else error-> returns empty empty dict and None
'''
try:
api = self.get_api()
tweet = api.get_status(tweet_id,tweet_mode='extended')._json
influencer_status_id = tweet["in_reply_to_status_id_str"]
tweet_out = [
f"https://twitter.com/{tweet['user']['screen_name']}",
f"https://twitter.com/{tweet['user']['screen_name']}/status/{tweet_id}",
tweet['favorite_count'],
tweet['text'],
]
return tweet_out,influencer_status_id
except:
#no data available for curr url
return [],None
def binder(self,link)->dict:
'''
binder-> binds info of both promoter and influencer and returns result dictonary
'''
tweet_id = link.split("/")[-1].split("?")[0] #get tweet id
promoter_info,influencer_tweet_id = self.get_tweet_info(tweet_id)
influencer_info,garbage = self.get_tweet_info(influencer_tweet_id)
return promoter_info + influencer_info
def get_spreadsheet(self)->None:
'''
fills instance variables with spreadsheet tweetlinks and result_worksheet with worksheet[1] object
scrapped_till -> count of links scrapped/row, used to fetched newly append links after
this row
'''
gsheet = self.gc.open_by_url(self.spreadsheet_url)
spreadsheet_response = gsheet.values_get(f"Form Responses 1!A{self.scraped_till}:A")
# output_sheet_filled_till_response = gsheet.values_get(f"Output!A1:A")
# if 'values' in output_sheet _filled_till_response:
# self.scraped_till += len(output_sheet_filled_till_response['values'])
# print(output_sheet_filled_till_response['values'])
if 'values' in spreadsheet_response:
self.scraped_till += len(spreadsheet_response['values'])-1
self.tweet_links = spreadsheet_response['values']
self.result_worksheet = gsheet.worksheets()[1]
return
return "no values"
def scrape_tweets(self):
if(self.get_spreadsheet()=="no values"): #fetch sheets to instance variables
print("no new values appended")
return
api_call_counts = 0
try:
for link in self.tweet_links:
scrapped_values = self.binder(link[0])
# print(scrapped_values)
self.result_worksheet.append_row(scrapped_values)
self.scraped_till+=1
time.sleep(1.5)
api_call_counts+=1
except Exception as e:
print(e,"exception from scrape tweets method")
return
def run_twizzer(self):
'''
twizzer -> main method of the instance, help's bot in running infinitely
'''
while True:
self.scrape_tweets()
print("method got crashed. running again")
time.sleep(5)
print("loop crashed")
import config
consumer_key = config.consumer_key
consumer_secret = config.consumer_secret
callback_uri = config.callback_uri
spredsheet_url = config.spredsheet_url
bot = VscaleTwitterBot("cred.json",spredsheet_url,consumer_key,consumer_secret,callback_uri)
bot.run_twizzer()
#end