Skip to content

Commit 8845dba

Browse files
author
Jonah Paten
authored
feat: analytics package total views users graphs #4353 (#4360)
* feat: basic functions to add charts to google sheets in analytics api (#4353) * feat: added change over time sheets to analytics package (#4353) * feat: added datetime handling to analytics sheets package (#4353) * chore: added missing docstrings in analytics package (#4353) * chore: bumped setup.py (#4353)
1 parent 60a7a6e commit 8845dba

File tree

5 files changed

+291
-30
lines changed

5 files changed

+291
-30
lines changed

analytics/analytics_package/analytics/api.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@
3030
{},
3131
)
3232

33+
sheets_service_params = (
34+
["https://www.googleapis.com/auth/spreadsheets"],
35+
"sheets", "v4",
36+
{}
37+
)
38+
3339
next_port = None
3440
default_service_system = None
3541

@@ -291,7 +297,6 @@ def build_params(source, subs):
291297

292298

293299
def results_to_df(results):
294-
295300
df = pd.DataFrame()
296301
for result in results:
297302
# Collect column nmes

analytics/analytics_package/analytics/charts.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,8 @@ def show_plot(df, title, fontsize=16, **other_params):
416416
fig.suptitle(title, fontsize=fontsize)
417417
plt.show()
418418

419-
def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
420-
titles, xlabels, metrics = strings_to_lists(titles, xlabels, metrics)
419+
def get_df_over_time(xlabels, metrics, dimensions, df_filter=None, **other_params):
420+
xlabels, metrics = strings_to_lists(xlabels, metrics)
421421

422422
df = get_data_df(metrics, dimensions, **other_params)
423423

@@ -430,6 +430,11 @@ def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_t
430430
# Rename for display
431431
df.rename(columns={name: xlabels[i] for i, name in enumerate(df.columns)}, inplace=True)
432432

433+
return df
434+
435+
def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
436+
df = get_df_over_time(xlabels, metrics, dimensions, df_filter=df_filter, **other_params)
437+
433438
if (not pre_plot_df_processor is None):
434439
df = pre_plot_df_processor(df)
435440

analytics/analytics_package/analytics/sheets_api.py

+202-24
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
from dataclasses import dataclass
2+
import typing
13
import gspread
24
import gspread_formatting
35
from enum import Enum
4-
from googleapiclient.discovery import build
5-
import numpy as np
6+
import pandas as pd
67

78
FONT_SIZE_PTS = 10
89
PTS_PIXELS_RATIO = 4/3
@@ -16,20 +17,33 @@ class FILE_OVERRIDE_BEHAVIORS(Enum):
1617
EXIT_IF_IN_SAME_PLACE = 2
1718
EXIT_ANYWHERE = 3
1819

20+
1921
class WORKSHEET_OVERRIDE_BEHAVIORS(Enum):
2022
OVERRIDE = 1
2123
EXIT = 2
2224

25+
2326
class COLUMN_FORMAT_OPTIONS(Enum):
2427
DEFAULT = 1
2528
PERCENT_UNCOLORED = 2
2629
PERCENT_COLORED = 3
30+
YEAR_MONTH_DATE = 4
31+
32+
33+
class CHART_TYPES(Enum):
34+
LINE = "LINE"
2735

2836
DEFAULT_SHEET_FORMATTING_OPTIONS = {
2937
"bold_header": True,
3038
"center_header": True,
3139
"freeze_header": True,
32-
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
40+
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS},
41+
"extra_columns": 0,
42+
"extra_columns_width": 50,
43+
}
44+
45+
DEFAULT_GSPREAD_UPDATE_ARGS = {
46+
"value_input_option": gspread.utils.ValueInputOption.user_entered,
3347
}
3448

3549
def extract_credentials(authentication_response):
@@ -41,7 +55,7 @@ def authenticate_gspread(authentication_response):
4155
gc = gspread.authorize(extract_credentials(authentication_response))
4256
return gc
4357

44-
def authenticate_drive_api(authentication_response):
58+
def authenticate_google_api(authentication_response):
4559
"""Authenticates the Drive API using the response from api.authenticate"""
4660
return authentication_response[0]
4761

@@ -107,21 +121,21 @@ def search_for_folder_id(drive_api, folder_name, allow_trashed = False, allow_du
107121
return [file["id"] for file in files_exact_match]
108122

109123

110-
def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE):
124+
def create_sheet_in_folder(drive_authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE):
111125
"""
112126
Create a new sheet in the project with the given name and parent folder.
113127
Returns the new sheet.
114128
115-
:param authentication_response: the service parameters tuple
129+
:param drive_authentication_response: the service parameters tuple
116130
:param sheet_name: the name of the new sheet
117131
:param parent_folder_name: the name of the parent folder for the new sheet
118132
:param override_behavior: the behavior to take if the sheet already exists
119133
:returns: the gspread.Spreadsheet object of the new sheet
120134
:rtype: gspread.Spreadsheet
121135
"""
122136
# Build Drive API
123-
gc = authenticate_gspread(authentication_response)
124-
drive_api = authenticate_drive_api(authentication_response)
137+
gc = authenticate_gspread(drive_authentication_response)
138+
drive_api = authenticate_google_api(drive_authentication_response)
125139
parent_folder_id = None if parent_folder_name is None else search_for_folder_id(drive_api, parent_folder_name)[0]
126140

127141
# Check if sheet already exists and handle based on input
@@ -163,8 +177,9 @@ def fill_worksheet_with_df(
163177
df,
164178
worksheet_name,
165179
overlapBehavior,
166-
sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS,
167-
column_formatting_options={}
180+
sheet_formatting_options={},
181+
column_formatting_options={},
182+
**gspread_update_args
168183
):
169184
"""
170185
Fill a worksheet with the contents of a DataFrame.
@@ -193,38 +208,50 @@ def fill_worksheet_with_df(
193208
title=worksheet_name, rows=df.shape[0], cols=df.shape[1]
194209
)
195210

211+
sheet_formatting_options_filled = {**DEFAULT_SHEET_FORMATTING_OPTIONS, **sheet_formatting_options}
212+
213+
# Add extra blank columns to the right of the worksheet
214+
df_to_insert = pd.concat(
215+
[df] + [pd.Series(" ", index=df.index, name="")] * sheet_formatting_options_filled["extra_columns"],
216+
axis=1
217+
)
196218
# Add data to worksheet
197-
worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist())
219+
worksheet.update(
220+
[df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist(),
221+
**{**DEFAULT_GSPREAD_UPDATE_ARGS, **gspread_update_args}
222+
)
198223

199224
# Format worksheet
200225
# Justify Column Widths
201-
if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]:
226+
if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]:
202227
text_widths = df.astype(str).columns.map(
203228
lambda column_name: df[column_name].astype(str).str.len().max()
204229
)
205230
header_widths = df.columns.str.len()
206231
buffer_chars = (
207232
DEFAULT_BUFFER_CHARS
208-
if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"])
209-
else sheet_formatting_options["column_widths"]["buffer_chars"]
233+
if ("column_widths" not in sheet_formatting_options_filled or "buffer_chars" not in sheet_formatting_options_filled["column_widths"])
234+
else sheet_formatting_options_filled["column_widths"]["buffer_chars"]
210235
)
211-
column_widths = [
236+
data_column_widths = [
212237
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
213238
for len_tuple in zip(text_widths, header_widths)
214239
]
240+
extra_column_widths = [sheet_formatting_options_filled["extra_columns_width"]] * sheet_formatting_options_filled["extra_columns"]
241+
combined_column_widths = data_column_widths + extra_column_widths
215242
column_positions = [
216-
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(column_widths)
243+
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(combined_column_widths)
217244
]
218-
gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths))
245+
gspread_formatting.set_column_widths(worksheet, zip(column_positions, combined_column_widths))
219246
# Freeze Header
220-
if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]:
247+
if "freeze_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["freeze_header"]:
221248
gspread_formatting.set_frozen(worksheet, rows=1)
222249
base_format_options = gspread_formatting.CellFormat()
223250
# Bold Header
224-
if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]:
251+
if "bold_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["bold_header"]:
225252
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
226253
# Center Header
227-
if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]:
254+
if "center_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["center_header"]:
228255
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
229256
# Handle column specific formatting
230257
for column in column_formatting_options:
@@ -269,6 +296,13 @@ def fill_worksheet_with_df(
269296
column_range,
270297
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
271298
)
299+
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE:
300+
# Apply date format rule
301+
gspread_formatting.format_cell_range(
302+
worksheet,
303+
column_range,
304+
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm'))
305+
)
272306

273307
# Apply base formatting options
274308
gspread_formatting.format_cell_range(
@@ -281,7 +315,7 @@ def fill_worksheet_with_df(
281315
if "Sheet1" in [i.title for i in sheet.worksheets()]:
282316
sheet.del_worksheet(sheet.worksheet("Sheet1"))
283317

284-
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
318+
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, **gspread_update_args):
285319
"""
286320
Fill a sheet with the contents of a dictionary of DataFrames.
287321
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
@@ -307,6 +341,150 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt
307341
for worksheet_name, df in df_dict.items():
308342
fill_worksheet_with_df(
309343
sheet, df, worksheet_name, overlapBehavior,
310-
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS),
311-
column_formatting_options=column_formatting_options.get(worksheet_name, {})
312-
)
344+
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}),
345+
column_formatting_options=column_formatting_options.get(worksheet_name, {}),
346+
**gspread_update_args
347+
)
348+
349+
def update_sheet_raw(sheets_authentication_response, sheet, *updates):
350+
"""
351+
Directly call the Google Sheets api to update the specified sheet with the optional arguments.
352+
"""
353+
assert len(updates) > 0
354+
sheets_api = authenticate_google_api(sheets_authentication_response)
355+
sheet_id = sheet.id
356+
body = {"requests": list(updates)}
357+
response = (
358+
sheets_api.spreadsheets()
359+
.batchUpdate(spreadsheetId=sheet_id, body=body)
360+
.execute()
361+
)
362+
return response
363+
364+
REQUIRED_CHART_ARGS = []
365+
366+
DEFAULT_CHART_ARGS = {
367+
"title": "",
368+
"x_axis_title": "",
369+
"y_axis_title": "",
370+
"invert_x_axis": False,
371+
"chart_position": None, # None means it will be created in a new sheet
372+
"chart_position_offset_x": 0,
373+
"chart_position_offset_y": 0,
374+
"chart_width": 600,
375+
"chart_height": 371,
376+
}
377+
378+
@dataclass
379+
class WorksheetRange:
380+
"""
381+
A dataclass to represent a range of cells in a worksheet in the one-sided interval [top_left, bottom_right).
382+
:param worksheet: the gspread.worksheet.Worksheet object
383+
:param top_left: the top left cell of the range. This cell will be included in the range
384+
:param bottom_right: the bottom right cell of the range. This cell will not be included in the range
385+
"""
386+
worksheet: gspread.worksheet.Worksheet
387+
top_left: gspread.cell.Cell
388+
bottom_right: gspread.cell.Cell
389+
390+
@property
391+
def range_dict(self):
392+
"""The range as a dictionary for the sources field in the Google Sheets api"""
393+
return {
394+
"sheetId": self.worksheet.id,
395+
"startRowIndex": self.top_left.row - 1,
396+
"endRowIndex": self.bottom_right.row - 1,
397+
"startColumnIndex": self.top_left.col - 1,
398+
"endColumnIndex": self.bottom_right.col - 1,
399+
}
400+
401+
def _cell_to_grid_coordinate(cell, worksheet):
402+
return {
403+
"sheetId": worksheet.id,
404+
"rowIndex": cell.row - 1,
405+
"columnIndex": cell.col - 1,
406+
}
407+
408+
def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_type, domain, series, **chart_args):
409+
"""
410+
Add a chart to a specified workshet
411+
:param sheets_authentication_response: the response from ga.authenticate. Must be for the sheets api v4
412+
:param sheet: the gspread.Spreadsheet object
413+
:param worksheet: the gspread.Worksheet object
414+
:param chart_type: the type of chart to add
415+
:param domain: the domain of the chart as a WorksheetRange. Must contain either one row or one column
416+
:param series: the series of the chart as a WorksheetRange. Must contain either one row or one column
417+
:param chart_args: other arguments to create the chart. See DEFAULT_CHART_ARGS
418+
"""
419+
complete_chart_args = {**DEFAULT_CHART_ARGS, **chart_args}
420+
if complete_chart_args["chart_position"] is not None:
421+
position_dict = {
422+
"overlayPosition": {
423+
"anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet),
424+
"offsetXPixels": complete_chart_args["chart_position_offset_x"],
425+
"offsetYPixels": complete_chart_args["chart_position_offset_y"],
426+
"widthPixels": complete_chart_args["chart_width"],
427+
"heightPixels": complete_chart_args["chart_height"],
428+
}
429+
}
430+
else:
431+
position_dict = {"newSheet": True}
432+
formatted_domains = [
433+
{
434+
"domain": {
435+
"sourceRange": {
436+
"sources": [
437+
domain.range_dict
438+
],
439+
},
440+
},
441+
"reversed": complete_chart_args["invert_x_axis"],
442+
},
443+
]
444+
445+
formatted_series = [
446+
{
447+
"series": {
448+
"sourceRange": {
449+
"sources": [
450+
series_source.range_dict
451+
],
452+
},
453+
},
454+
"targetAxis": "LEFT_AXIS",
455+
}
456+
for series_source in series
457+
]
458+
formatted_axis = []
459+
if complete_chart_args["x_axis_title"]:
460+
formatted_axis.append({
461+
"title": complete_chart_args["x_axis_title"],
462+
"position": "BOTTOM_AXIS",
463+
})
464+
if complete_chart_args["y_axis_title"]:
465+
formatted_axis.append({
466+
"title": complete_chart_args["y_axis_title"],
467+
"position": "LEFT_AXIS",
468+
})
469+
request = {
470+
"addChart": {
471+
"chart": {
472+
"spec": {
473+
"title": complete_chart_args["title"],
474+
#TODO: insert legend position
475+
#TODO: insert axis positions
476+
"basicChart": {
477+
"axis": formatted_axis,
478+
"chartType": chart_type.value,
479+
"domains": formatted_domains,
480+
"headerCount": 1, #TODO: not sure what this means
481+
"series": formatted_series,
482+
},
483+
},
484+
"position": position_dict
485+
},
486+
},
487+
}
488+
489+
response = update_sheet_raw(sheets_authentication_response, sheet, request)
490+
return response

0 commit comments

Comments
 (0)