Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: analytics package total views users graphs #4353 #4360

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion analytics/analytics_package/analytics/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
{},
)

sheets_service_params = (
["https://www.googleapis.com/auth/spreadsheets"],
"sheets", "v4",
{}
)

next_port = None
default_service_system = None

Expand Down Expand Up @@ -291,7 +297,6 @@ def build_params(source, subs):


def results_to_df(results):

df = pd.DataFrame()
for result in results:
# Collect column nmes
Expand Down
9 changes: 7 additions & 2 deletions analytics/analytics_package/analytics/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ def show_plot(df, title, fontsize=16, **other_params):
fig.suptitle(title, fontsize=fontsize)
plt.show()

def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
titles, xlabels, metrics = strings_to_lists(titles, xlabels, metrics)
def get_df_over_time(xlabels, metrics, dimensions, df_filter=None, **other_params):
xlabels, metrics = strings_to_lists(xlabels, metrics)

df = get_data_df(metrics, dimensions, **other_params)

Expand All @@ -430,6 +430,11 @@ def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_t
# Rename for display
df.rename(columns={name: xlabels[i] for i, name in enumerate(df.columns)}, inplace=True)

return df

def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
df = get_df_over_time(xlabels, metrics, dimensions, df_filter=df_filter, **other_params)

if (not pre_plot_df_processor is None):
df = pre_plot_df_processor(df)

Expand Down
226 changes: 202 additions & 24 deletions analytics/analytics_package/analytics/sheets_api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from dataclasses import dataclass
import typing
import gspread
import gspread_formatting
from enum import Enum
from googleapiclient.discovery import build
import numpy as np
import pandas as pd

FONT_SIZE_PTS = 10
PTS_PIXELS_RATIO = 4/3
Expand All @@ -16,20 +17,33 @@ class FILE_OVERRIDE_BEHAVIORS(Enum):
EXIT_IF_IN_SAME_PLACE = 2
EXIT_ANYWHERE = 3


class WORKSHEET_OVERRIDE_BEHAVIORS(Enum):
OVERRIDE = 1
EXIT = 2


class COLUMN_FORMAT_OPTIONS(Enum):
DEFAULT = 1
PERCENT_UNCOLORED = 2
PERCENT_COLORED = 3
YEAR_MONTH_DATE = 4


class CHART_TYPES(Enum):
LINE = "LINE"

DEFAULT_SHEET_FORMATTING_OPTIONS = {
"bold_header": True,
"center_header": True,
"freeze_header": True,
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS},
"extra_columns": 0,
"extra_columns_width": 50,
}

DEFAULT_GSPREAD_UPDATE_ARGS = {
"value_input_option": gspread.utils.ValueInputOption.user_entered,
}

def extract_credentials(authentication_response):
Expand All @@ -41,7 +55,7 @@ def authenticate_gspread(authentication_response):
gc = gspread.authorize(extract_credentials(authentication_response))
return gc

def authenticate_drive_api(authentication_response):
def authenticate_google_api(authentication_response):
"""Authenticates the Drive API using the response from api.authenticate"""
return authentication_response[0]

Expand Down Expand Up @@ -107,21 +121,21 @@ def search_for_folder_id(drive_api, folder_name, allow_trashed = False, allow_du
return [file["id"] for file in files_exact_match]


def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE):
def create_sheet_in_folder(drive_authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE):
"""
Create a new sheet in the project with the given name and parent folder.
Returns the new sheet.

:param authentication_response: the service parameters tuple
:param drive_authentication_response: the service parameters tuple
:param sheet_name: the name of the new sheet
:param parent_folder_name: the name of the parent folder for the new sheet
:param override_behavior: the behavior to take if the sheet already exists
:returns: the gspread.Spreadsheet object of the new sheet
:rtype: gspread.Spreadsheet
"""
# Build Drive API
gc = authenticate_gspread(authentication_response)
drive_api = authenticate_drive_api(authentication_response)
gc = authenticate_gspread(drive_authentication_response)
drive_api = authenticate_google_api(drive_authentication_response)
parent_folder_id = None if parent_folder_name is None else search_for_folder_id(drive_api, parent_folder_name)[0]

# Check if sheet already exists and handle based on input
Expand Down Expand Up @@ -163,8 +177,9 @@ def fill_worksheet_with_df(
df,
worksheet_name,
overlapBehavior,
sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS,
column_formatting_options={}
sheet_formatting_options={},
column_formatting_options={},
**gspread_update_args
):
"""
Fill a worksheet with the contents of a DataFrame.
Expand Down Expand Up @@ -193,38 +208,50 @@ def fill_worksheet_with_df(
title=worksheet_name, rows=df.shape[0], cols=df.shape[1]
)

sheet_formatting_options_filled = {**DEFAULT_SHEET_FORMATTING_OPTIONS, **sheet_formatting_options}

# Add extra blank columns to the right of the worksheet
df_to_insert = pd.concat(
[df] + [pd.Series(" ", index=df.index, name="")] * sheet_formatting_options_filled["extra_columns"],
axis=1
)
# Add data to worksheet
worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist())
worksheet.update(
[df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist(),
**{**DEFAULT_GSPREAD_UPDATE_ARGS, **gspread_update_args}
)

# Format worksheet
# Justify Column Widths
if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]:
if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]:
text_widths = df.astype(str).columns.map(
lambda column_name: df[column_name].astype(str).str.len().max()
)
header_widths = df.columns.str.len()
buffer_chars = (
DEFAULT_BUFFER_CHARS
if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"])
else sheet_formatting_options["column_widths"]["buffer_chars"]
if ("column_widths" not in sheet_formatting_options_filled or "buffer_chars" not in sheet_formatting_options_filled["column_widths"])
else sheet_formatting_options_filled["column_widths"]["buffer_chars"]
)
column_widths = [
data_column_widths = [
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
for len_tuple in zip(text_widths, header_widths)
]
extra_column_widths = [sheet_formatting_options_filled["extra_columns_width"]] * sheet_formatting_options_filled["extra_columns"]
combined_column_widths = data_column_widths + extra_column_widths
column_positions = [
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(column_widths)
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(combined_column_widths)
]
gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths))
gspread_formatting.set_column_widths(worksheet, zip(column_positions, combined_column_widths))
# Freeze Header
if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]:
if "freeze_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["freeze_header"]:
gspread_formatting.set_frozen(worksheet, rows=1)
base_format_options = gspread_formatting.CellFormat()
# Bold Header
if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]:
if "bold_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["bold_header"]:
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
# Center Header
if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]:
if "center_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["center_header"]:
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
# Handle column specific formatting
for column in column_formatting_options:
Expand Down Expand Up @@ -269,6 +296,13 @@ def fill_worksheet_with_df(
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
)
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE:
# Apply date format rule
gspread_formatting.format_cell_range(
worksheet,
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm'))
)

# Apply base formatting options
gspread_formatting.format_cell_range(
Expand All @@ -281,7 +315,7 @@ def fill_worksheet_with_df(
if "Sheet1" in [i.title for i in sheet.worksheets()]:
sheet.del_worksheet(sheet.worksheet("Sheet1"))

def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, **gspread_update_args):
"""
Fill a sheet with the contents of a dictionary of DataFrames.
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
Expand All @@ -307,6 +341,150 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt
for worksheet_name, df in df_dict.items():
fill_worksheet_with_df(
sheet, df, worksheet_name, overlapBehavior,
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS),
column_formatting_options=column_formatting_options.get(worksheet_name, {})
)
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}),
column_formatting_options=column_formatting_options.get(worksheet_name, {}),
**gspread_update_args
)

def update_sheet_raw(sheets_authentication_response, sheet, *updates):
"""
Directly call the Google Sheets api to update the specified sheet with the optional arguments.
"""
assert len(updates) > 0
sheets_api = authenticate_google_api(sheets_authentication_response)
sheet_id = sheet.id
body = {"requests": list(updates)}
response = (
sheets_api.spreadsheets()
.batchUpdate(spreadsheetId=sheet_id, body=body)
.execute()
)
return response

REQUIRED_CHART_ARGS = []

DEFAULT_CHART_ARGS = {
"title": "",
"x_axis_title": "",
"y_axis_title": "",
"invert_x_axis": False,
"chart_position": None, # None means it will be created in a new sheet
"chart_position_offset_x": 0,
"chart_position_offset_y": 0,
"chart_width": 600,
"chart_height": 371,
}

@dataclass
class WorksheetRange:
"""
A dataclass to represent a range of cells in a worksheet in the one-sided interval [top_left, bottom_right).
:param worksheet: the gspread.worksheet.Worksheet object
:param top_left: the top left cell of the range. This cell will be included in the range
:param bottom_right: the bottom right cell of the range. This cell will not be included in the range
"""
worksheet: gspread.worksheet.Worksheet
top_left: gspread.cell.Cell
bottom_right: gspread.cell.Cell

@property
def range_dict(self):
"""The range as a dictionary for the sources field in the Google Sheets api"""
return {
"sheetId": self.worksheet.id,
"startRowIndex": self.top_left.row - 1,
"endRowIndex": self.bottom_right.row - 1,
"startColumnIndex": self.top_left.col - 1,
"endColumnIndex": self.bottom_right.col - 1,
}

def _cell_to_grid_coordinate(cell, worksheet):
return {
"sheetId": worksheet.id,
"rowIndex": cell.row - 1,
"columnIndex": cell.col - 1,
}

def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_type, domain, series, **chart_args):
"""
Add a chart to a specified workshet
:param sheets_authentication_response: the response from ga.authenticate. Must be for the sheets api v4
:param sheet: the gspread.Spreadsheet object
:param worksheet: the gspread.Worksheet object
:param chart_type: the type of chart to add
:param domain: the domain of the chart as a WorksheetRange. Must contain either one row or one column
:param series: the series of the chart as a WorksheetRange. Must contain either one row or one column
:param chart_args: other arguments to create the chart. See DEFAULT_CHART_ARGS
"""
complete_chart_args = {**DEFAULT_CHART_ARGS, **chart_args}
if complete_chart_args["chart_position"] is not None:
position_dict = {
"overlayPosition": {
"anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet),
"offsetXPixels": complete_chart_args["chart_position_offset_x"],
"offsetYPixels": complete_chart_args["chart_position_offset_y"],
"widthPixels": complete_chart_args["chart_width"],
"heightPixels": complete_chart_args["chart_height"],
}
}
else:
position_dict = {"newSheet": True}
formatted_domains = [
{
"domain": {
"sourceRange": {
"sources": [
domain.range_dict
],
},
},
"reversed": complete_chart_args["invert_x_axis"],
},
]

formatted_series = [
{
"series": {
"sourceRange": {
"sources": [
series_source.range_dict
],
},
},
"targetAxis": "LEFT_AXIS",
}
for series_source in series
]
formatted_axis = []
if complete_chart_args["x_axis_title"]:
formatted_axis.append({
"title": complete_chart_args["x_axis_title"],
"position": "BOTTOM_AXIS",
})
if complete_chart_args["y_axis_title"]:
formatted_axis.append({
"title": complete_chart_args["y_axis_title"],
"position": "LEFT_AXIS",
})
request = {
"addChart": {
"chart": {
"spec": {
"title": complete_chart_args["title"],
#TODO: insert legend position
#TODO: insert axis positions
"basicChart": {
"axis": formatted_axis,
"chartType": chart_type.value,
"domains": formatted_domains,
"headerCount": 1, #TODO: not sure what this means
"series": formatted_series,
},
},
"position": position_dict
},
},
}

response = update_sheet_raw(sheets_authentication_response, sheet, request)
return response
Loading
Loading