-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_search_rate.py
107 lines (91 loc) · 3.6 KB
/
get_search_rate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from datalab_api import DatalabSearchAPI
import reusable_funcs
import custom_exceptions
from tabulate import tabulate
import json, time, os
from tqdm import tqdm
import pandas as pd
import numpy as np
@reusable_funcs.ReAttemptUntilFailure(max_attempt=3, time=10)
def GetResponse(datalab_api, keywords: list, startDate: str, endDate: str) -> dict:
keywords_groups = datalab_api.CreateKeywordsGroups(keywords)
body = datalab_api.CreateBody(startDate=startDate, endDate=endDate, keywords_groups=keywords_groups)
response = datalab_api.GetResponse(body)
rescode = response.getcode()
if rescode != 200:
raise custom_exceptions.ResponseError(rescode)
response_body = response.read()
jobj = json.loads(response_body)
return jobj
def GetDaysList(startDate, endDate) -> list:
days_list = []
while startDate != endDate:
days_list.append(startDate.strftime('%Y-%m-%d'))
startDate += pd.to_timedelta(1, 'D')
days_list.append(endDate)
return days_list
def CreateDateColumns(days_list, table):
for i in range(30):
table[days_list[i]] = []
def AppendRatio(days_list: list, response, table):
for i in range(5):
#For each keywords
table["Keywords"].append(response["results"][i]["title"])
for day in range(30):
#For each day of each keyword
try:
ratio = response["results"][i]["data"][day]["ratio"]
table[days_list[day]].append(ratio)
except IndexError:
print("!ERROR: INDEX ERROR, APPENDING NaN")
print(response["results"][i]["title"])
for i in range(day, 30):
table[days_list[i]].append(np.nan)
break
def main():
#1. Initializing values
datalab_api = DatalabSearchAPI()
file_name = os.environ.get('FILE_NAME', './data/test.csv')
table = pd.read_csv(file_name, encoding="euc-kr")
keywords = list(table["Keywords"])
#2. Getting the endDate(yesterday) and startDate(1 month ago)
if "2" in file_name:
endDate = reusable_funcs.GetEndDate()
endDate -= pd.to_timedelta(1, 'D') #SUBTRACTS AN EXTRA DAY WHEN WORKING WITH THE SECOND HALF OF THE TABLE, TO MAKE THE DATES THE SAME.
else:
endDate = reusable_funcs.GetEndDate()
startDate = reusable_funcs.GetStartDate(endDate, 'm')
days_list = GetDaysList(startDate, endDate)
endDate = endDate.strftime('%Y-%m-%d')
startDate = startDate.strftime('%Y-%m-%d')
#3. Initializing new data
new_data = {
"Keywords":[]
}
'''
going to look something like...
{
"2023-11-20" : [24,42,12,1,23,.......] (len = 60000)
"2023-11-21" : [24,42,12,1,23,.......] (len = 60000)
.
.
(len = 30)
}
'''
CreateDateColumns(days_list, table=new_data)#INITIALIZING THE COLUMNS AS THE DATES IN THE FIRST LOOP
#4. Completing the new data table
for i in tqdm(range(5, len(keywords)+1, 5)):
temp_keywords = keywords[i-5:i]
response = GetResponse(datalab_api, temp_keywords, startDate, endDate)
AppendRatio(days_list, response, new_data)
# df_new_data = pd.DataFrame(new_data)
# debug_data = df_new_data[["Keywords", "2023-12-10"]]
# print(tabulate(debug_data, headers='keys', tablefmt='psql'))
del new_data["Keywords"]
#5. Appending the new data to the old table
for key, value in new_data.items():
table[str(key)] = value
save_as = os.environ.get('SAVE_AS', './data/test_search_ratio.csv')
table.to_csv(save_as, encoding="euc-kr", index=False)
if __name__ == "__main__":
main()