From c124ac94b14fef3af20955dcf20796d7c1bc121c Mon Sep 17 00:00:00 2001 From: jpaten Date: Mon, 27 Jan 2025 16:47:43 -0800 Subject: [PATCH 1/5] feat: basic functions to add charts to google sheets in analytics api (#4353) --- analytics/analytics_package/analytics/api.py | 8 +- .../analytics_package/analytics/sheets_api.py | 142 +++++++++++++++++- 2 files changed, 141 insertions(+), 9 deletions(-) diff --git a/analytics/analytics_package/analytics/api.py b/analytics/analytics_package/analytics/api.py index a358da933..378fc6174 100644 --- a/analytics/analytics_package/analytics/api.py +++ b/analytics/analytics_package/analytics/api.py @@ -30,6 +30,12 @@ {}, ) +sheets_service_params = ( + ["https://www.googleapis.com/auth/spreadsheets"], + "sheets", "v4", + {} +) + next_port = None default_service_system = None @@ -291,7 +297,7 @@ def build_params(source, subs): def results_to_df(results): - + df = pd.DataFrame() for result in results: # Collect column nmes diff --git a/analytics/analytics_package/analytics/sheets_api.py b/analytics/analytics_package/analytics/sheets_api.py index dffca9701..407f8fe06 100644 --- a/analytics/analytics_package/analytics/sheets_api.py +++ b/analytics/analytics_package/analytics/sheets_api.py @@ -1,8 +1,8 @@ +from dataclasses import dataclass +import typing import gspread import gspread_formatting from enum import Enum -from googleapiclient.discovery import build -import numpy as np FONT_SIZE_PTS = 10 PTS_PIXELS_RATIO = 4/3 @@ -16,15 +16,21 @@ class FILE_OVERRIDE_BEHAVIORS(Enum): EXIT_IF_IN_SAME_PLACE = 2 EXIT_ANYWHERE = 3 + class WORKSHEET_OVERRIDE_BEHAVIORS(Enum): OVERRIDE = 1 EXIT = 2 + class COLUMN_FORMAT_OPTIONS(Enum): DEFAULT = 1 PERCENT_UNCOLORED = 2 PERCENT_COLORED = 3 + +class CHART_TYPES(Enum): + LINE = "LINE" + DEFAULT_SHEET_FORMATTING_OPTIONS = { "bold_header": True, "center_header": True, @@ -41,7 +47,7 @@ def authenticate_gspread(authentication_response): gc = gspread.authorize(extract_credentials(authentication_response)) return gc -def authenticate_drive_api(authentication_response): +def authenticate_google_api(authentication_response): """Authenticates the Drive API using the response from api.authenticate""" return authentication_response[0] @@ -107,12 +113,12 @@ def search_for_folder_id(drive_api, folder_name, allow_trashed = False, allow_du return [file["id"] for file in files_exact_match] -def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE): +def create_sheet_in_folder(drive_authentication_response, sheet_name, parent_folder_name=None, override_behavior=FILE_OVERRIDE_BEHAVIORS.EXIT_ANYWHERE): """ Create a new sheet in the project with the given name and parent folder. Returns the new sheet. - :param authentication_response: the service parameters tuple + :param drive_authentication_response: the service parameters tuple :param sheet_name: the name of the new sheet :param parent_folder_name: the name of the parent folder for the new sheet :param override_behavior: the behavior to take if the sheet already exists @@ -120,8 +126,8 @@ def create_sheet_in_folder(authentication_response, sheet_name, parent_folder_na :rtype: gspread.Spreadsheet """ # Build Drive API - gc = authenticate_gspread(authentication_response) - drive_api = authenticate_drive_api(authentication_response) + gc = authenticate_gspread(drive_authentication_response) + drive_api = authenticate_google_api(drive_authentication_response) parent_folder_id = None if parent_folder_name is None else search_for_folder_id(drive_api, parent_folder_name)[0] # Check if sheet already exists and handle based on input @@ -309,4 +315,124 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt sheet, df, worksheet_name, overlapBehavior, sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS), column_formatting_options=column_formatting_options.get(worksheet_name, {}) - ) \ No newline at end of file + ) + +def update_sheet_raw(sheets_authentication_response, sheet, *updates): + """ + Directly call the Google Sheets api to update the specified sheet with the optional arguments. + """ + assert len(updates) > 0 + sheets_api = authenticate_google_api(sheets_authentication_response) + sheet_id = sheet.id + body = {"requests": list(updates)} + response = ( + sheets_api.spreadsheets() + .batchUpdate(spreadsheetId=sheet_id, body=body) + .execute() + ) + return response + +REQUIRED_CHART_ARGS = [] + +DEFAULT_CHART_ARGS = { + "title": "", + "x_axis_title": "", + "y_axis_title": "", + "chart_position": None # Means it will be created in a new sheet +} + +@dataclass +class WorksheetRange: + worksheet: gspread.worksheet.Worksheet + top_left: gspread.cell.Cell + bottom_right: gspread.cell.Cell + + @property + def range_dict(self): + return { + "sheetId": self.worksheet.id, + "startRowIndex": self.top_left.row - 1, + "endRowIndex": self.bottom_right.row - 1, + "startColumnIndex": self.top_left.col - 1, + "endColumnIndex": self.bottom_right.col - 1, + } + +def _cell_to_grid_coordinate(cell, worksheet): + return { + "sheetId": worksheet.id, + "rowIndex": cell.row - 1, + "columnIndex": cell.col - 1, + } + +def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_type, domain, series, **chart_args): + complete_chart_args = {**DEFAULT_CHART_ARGS, **chart_args} + print(worksheet.id) + if complete_chart_args["chart_position"] is not None: + position_dict = { + "overlayPosition": { + "anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet) + } + } + else: + position_dict = {"newSheet": True} + formatted_domains = [ + { + "domain": { + #TODO: would be nice to also support column references https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#DataSourceColumnReference + "sourceRange": { + "sources": [ + domain.range_dict + ], + }, + }, + }, + ] + formatted_series = [ + { + "series": { + "sourceRange": { + "sources": [ + series_source.range_dict + ], + }, + }, + "targetAxis": "LEFT_AXIS", + } + for series_source in series + ] + formatted_axis = [] + if complete_chart_args["x_axis_title"]: + formatted_axis.append({ + "title": complete_chart_args["x_axis_title"], + "position": "BOTTOM_AXIS", + }) + if complete_chart_args["y_axis_title"]: + formatted_axis.append({ + "title": complete_chart_args["y_axis_title"], + "position": "LEFT_AXIS", + }) + print(formatted_domains) + print(formatted_series) + request = { + "addChart": { + "chart": { + "spec": { + "title": complete_chart_args["title"], + #TODO: insert legend position + #TODO: insert axis positions + "basicChart": { + "axis": formatted_axis, + "chartType": chart_type.value, + "domains": formatted_domains, + "headerCount": 1, #TODO: not sure what this means + "series": formatted_series, + }, + }, + "position": position_dict + }, + }, + } + print(request) + + response = update_sheet_raw(sheets_authentication_response, sheet, request) + return response From dbc8e19294b740ee3948ee45f49464abfb16e9fc Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 31 Jan 2025 13:28:04 -0800 Subject: [PATCH 2/5] feat: added change over time sheets to analytics package (#4353) --- .../analytics_package/analytics/charts.py | 9 ++- .../analytics_package/analytics/sheets_api.py | 56 ++++++++++++------- .../analytics/sheets_elements.py | 41 +++++++++++++- 3 files changed, 83 insertions(+), 23 deletions(-) diff --git a/analytics/analytics_package/analytics/charts.py b/analytics/analytics_package/analytics/charts.py index ba1313e35..827017e6f 100644 --- a/analytics/analytics_package/analytics/charts.py +++ b/analytics/analytics_package/analytics/charts.py @@ -416,8 +416,8 @@ def show_plot(df, title, fontsize=16, **other_params): fig.suptitle(title, fontsize=fontsize) plt.show() -def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params): - titles, xlabels, metrics = strings_to_lists(titles, xlabels, metrics) +def get_df_over_time(xlabels, metrics, dimensions, df_filter=None, **other_params): + xlabels, metrics = strings_to_lists(xlabels, metrics) df = get_data_df(metrics, dimensions, **other_params) @@ -430,6 +430,11 @@ def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_t # Rename for display df.rename(columns={name: xlabels[i] for i, name in enumerate(df.columns)}, inplace=True) + return df + +def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params): + df = get_df_over_time(xlabels, metrics, dimensions, df_filter=df_filter, **other_params) + if (not pre_plot_df_processor is None): df = pre_plot_df_processor(df) diff --git a/analytics/analytics_package/analytics/sheets_api.py b/analytics/analytics_package/analytics/sheets_api.py index 407f8fe06..999c443c5 100644 --- a/analytics/analytics_package/analytics/sheets_api.py +++ b/analytics/analytics_package/analytics/sheets_api.py @@ -3,6 +3,7 @@ import gspread import gspread_formatting from enum import Enum +import pandas as pd FONT_SIZE_PTS = 10 PTS_PIXELS_RATIO = 4/3 @@ -35,7 +36,9 @@ class CHART_TYPES(Enum): "bold_header": True, "center_header": True, "freeze_header": True, - "column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS} + "column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}, + "extra_columns": 0, + "extra_columns_width": 50, } def extract_credentials(authentication_response): @@ -169,7 +172,7 @@ def fill_worksheet_with_df( df, worksheet_name, overlapBehavior, - sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS, + sheet_formatting_options={}, column_formatting_options={} ): """ @@ -199,38 +202,47 @@ def fill_worksheet_with_df( title=worksheet_name, rows=df.shape[0], cols=df.shape[1] ) + sheet_formatting_options_filled = {**DEFAULT_SHEET_FORMATTING_OPTIONS, **sheet_formatting_options} + + # Add extra blank columns to the right of the worksheet + df_to_insert = pd.concat( + [df] + [pd.Series(" ", index=df.index, name="")] * sheet_formatting_options_filled["extra_columns"], + axis=1 + ) # Add data to worksheet - worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist()) + worksheet.update([df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist()) # Format worksheet # Justify Column Widths - if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]: + if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]: text_widths = df.astype(str).columns.map( lambda column_name: df[column_name].astype(str).str.len().max() ) header_widths = df.columns.str.len() buffer_chars = ( DEFAULT_BUFFER_CHARS - if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"]) - else sheet_formatting_options["column_widths"]["buffer_chars"] + if ("column_widths" not in sheet_formatting_options_filled or "buffer_chars" not in sheet_formatting_options_filled["column_widths"]) + else sheet_formatting_options_filled["column_widths"]["buffer_chars"] ) - column_widths = [ + data_column_widths = [ round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO) for len_tuple in zip(text_widths, header_widths) ] + extra_column_widths = [sheet_formatting_options_filled["extra_columns_width"]] * sheet_formatting_options_filled["extra_columns"] + combined_column_widths = data_column_widths + extra_column_widths column_positions = [ - gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(column_widths) + gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(combined_column_widths) ] - gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths)) + gspread_formatting.set_column_widths(worksheet, zip(column_positions, combined_column_widths)) # Freeze Header - if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]: + if "freeze_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["freeze_header"]: gspread_formatting.set_frozen(worksheet, rows=1) base_format_options = gspread_formatting.CellFormat() # Bold Header - if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]: + if "bold_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["bold_header"]: base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True)) # Center Header - if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]: + if "center_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["center_header"]: base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER") # Handle column specific formatting for column in column_formatting_options: @@ -313,7 +325,7 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt for worksheet_name, df in df_dict.items(): fill_worksheet_with_df( sheet, df, worksheet_name, overlapBehavior, - sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS), + sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}), column_formatting_options=column_formatting_options.get(worksheet_name, {}) ) @@ -338,7 +350,12 @@ def update_sheet_raw(sheets_authentication_response, sheet, *updates): "title": "", "x_axis_title": "", "y_axis_title": "", - "chart_position": None # Means it will be created in a new sheet + "invert_x_axis": False, + "chart_position": None, # None means it will be created in a new sheet + "chart_position_offset_x": 0, + "chart_position_offset_y": 0, + "chart_width": 600, + "chart_height": 371, } @dataclass @@ -366,11 +383,14 @@ def _cell_to_grid_coordinate(cell, worksheet): def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_type, domain, series, **chart_args): complete_chart_args = {**DEFAULT_CHART_ARGS, **chart_args} - print(worksheet.id) if complete_chart_args["chart_position"] is not None: position_dict = { "overlayPosition": { - "anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet) + "anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet), + "offsetXPixels": complete_chart_args["chart_position_offset_x"], + "offsetYPixels": complete_chart_args["chart_position_offset_y"], + "widthPixels": complete_chart_args["chart_width"], + "heightPixels": complete_chart_args["chart_height"], } } else: @@ -385,6 +405,7 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t ], }, }, + "reversed": complete_chart_args["invert_x_axis"], }, ] formatted_series = [ @@ -411,8 +432,6 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t "title": complete_chart_args["y_axis_title"], "position": "LEFT_AXIS", }) - print(formatted_domains) - print(formatted_series) request = { "addChart": { "chart": { @@ -432,7 +451,6 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t }, }, } - print(request) response = update_sheet_raw(sheets_authentication_response, sheet, request) return response diff --git a/analytics/analytics_package/analytics/sheets_elements.py b/analytics/analytics_package/analytics/sheets_elements.py index 5a99e6d12..7e3c2c831 100644 --- a/analytics/analytics_package/analytics/sheets_elements.py +++ b/analytics/analytics_package/analytics/sheets_elements.py @@ -1,6 +1,7 @@ +from enum import Enum import numpy as np import pandas as pd -from .charts import get_data_df +from .charts import get_data_df, get_df_over_time from .fields import * from urllib.parse import urlparse import datetime as dt @@ -225,4 +226,40 @@ def get_change(series_current, series_previous, start_current, end_current, star # Adjust the values from the prior series to account for the different number of days in the month series_previous_reindexed = (series_previous.reindex(combined_index) * current_length / previous_length) change = ((series_current_reindexed / series_previous_reindexed) - 1).replace({np.inf: np.nan}) - return change \ No newline at end of file + return change + +class ADDITIONAL_DATA_BEHAVIOR(Enum): + ADD = "add" + REPLACE = "replace" + +def get_change_over_time_df( + analytics_params, include_changes=True, additional_data_path=None, additional_data_behavior=None +): + df_api = get_df_over_time( + ["Users", "Total Pageviews"], + ["activeUsers", "screenPageViews"], + "yearMonth", + sort_results=["yearMonth"], + df_processor=(lambda df: df.set_index(df.index + "01")[-2::-1]), + format_table=False, + **analytics_params + ) + + df_combined = pd.DataFrame() + + if additional_data_path is not None: + assert additional_data_behavior is not None + df_saved = pd.read_json(additional_data_path) + if additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR.ADD: + df_combined = df_api.add(df_saved.astype(int), fill_value=0)[::-1] + elif additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR.REPLACE: + df_combined = pd.concat([df_saved, df_api], ignore_index=False) + df_combined = df_combined.loc[~df_combined.index.duplicated(keep="first")].sort_index(ascending=False) + else: + df_combined = df_api + + if include_changes: + df_combined["Users Change"] = df_combined["Users"].pct_change() + df_combined["Total Pageviews Change"] = df_combined["Total Pageviews"].pct_change() + + return df_combined From 38a1258a2d7f04e61b513c5694c60db79eeaa6fa Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 31 Jan 2025 15:24:12 -0800 Subject: [PATCH 3/5] feat: added datetime handling to analytics sheets package (#4353) --- .../analytics_package/analytics/sheets_api.py | 25 +++++++-- .../analytics/sheets_elements.py | 56 +++++++++++++++---- 2 files changed, 67 insertions(+), 14 deletions(-) diff --git a/analytics/analytics_package/analytics/sheets_api.py b/analytics/analytics_package/analytics/sheets_api.py index 999c443c5..291680dea 100644 --- a/analytics/analytics_package/analytics/sheets_api.py +++ b/analytics/analytics_package/analytics/sheets_api.py @@ -27,6 +27,7 @@ class COLUMN_FORMAT_OPTIONS(Enum): DEFAULT = 1 PERCENT_UNCOLORED = 2 PERCENT_COLORED = 3 + YEAR_MONTH_DATE = 4 class CHART_TYPES(Enum): @@ -41,6 +42,10 @@ class CHART_TYPES(Enum): "extra_columns_width": 50, } +DEFAULT_GSPREAD_UPDATE_ARGS = { + "value_input_option": gspread.utils.ValueInputOption.user_entered, +} + def extract_credentials(authentication_response): """Extracts the credentials from the tuple from api.authenticate""" return authentication_response[3] @@ -173,7 +178,8 @@ def fill_worksheet_with_df( worksheet_name, overlapBehavior, sheet_formatting_options={}, - column_formatting_options={} + column_formatting_options={}, + **gspread_update_args ): """ Fill a worksheet with the contents of a DataFrame. @@ -210,7 +216,10 @@ def fill_worksheet_with_df( axis=1 ) # Add data to worksheet - worksheet.update([df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist()) + worksheet.update( + [df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist(), + **{**DEFAULT_GSPREAD_UPDATE_ARGS, **gspread_update_args} + ) # Format worksheet # Justify Column Widths @@ -287,6 +296,13 @@ def fill_worksheet_with_df( column_range, gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%')) ) + if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE: + # Apply date format rule + gspread_formatting.format_cell_range( + worksheet, + column_range, + gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm')) + ) # Apply base formatting options gspread_formatting.format_cell_range( @@ -299,7 +315,7 @@ def fill_worksheet_with_df( if "Sheet1" in [i.title for i in sheet.worksheets()]: sheet.del_worksheet(sheet.worksheet("Sheet1")) -def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}): +def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, **gspread_update_args): """ Fill a sheet with the contents of a dictionary of DataFrames. The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet. @@ -326,7 +342,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt fill_worksheet_with_df( sheet, df, worksheet_name, overlapBehavior, sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}), - column_formatting_options=column_formatting_options.get(worksheet_name, {}) + column_formatting_options=column_formatting_options.get(worksheet_name, {}), + **gspread_update_args ) def update_sheet_raw(sheets_authentication_response, sheet, *updates): diff --git a/analytics/analytics_package/analytics/sheets_elements.py b/analytics/analytics_package/analytics/sheets_elements.py index 7e3c2c831..0ae75349c 100644 --- a/analytics/analytics_package/analytics/sheets_elements.py +++ b/analytics/analytics_package/analytics/sheets_elements.py @@ -232,17 +232,47 @@ class ADDITIONAL_DATA_BEHAVIOR(Enum): ADD = "add" REPLACE = "replace" +def get_page_views_over_time_df(analytics_params, additional_data_path=None, additional_data_behavior=None): + """ + Get a DataFrame with pageviews and total active users over time from the Analytics API. + :param analytics_params: the parameters for the Analytics API, including service params, start dates, and end dates + :param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None + :param additional_data_behavior: the behavior to use when adding the additional data, defaults to None + """ + return get_change_over_time_df( + ["Users", "Total Pageviews"], + ["activeUsers", "screenPageViews"], + ["Month"], + "yearMonth", + additional_data_path=additional_data_path, + additional_data_behavior=additional_data_behavior, + **analytics_params + ) + def get_change_over_time_df( - analytics_params, include_changes=True, additional_data_path=None, additional_data_behavior=None + metric_titles, metrics, time_title, time_dimension, include_changes=True, change_title_suffix = " Change", additional_data_path=None, additional_data_behavior=None, strftime_format="%Y-%m", **other_params ): + """ + Get a DataFrame with the change over time for the given metrics, renamed to match metric_titles + :param metric_titles: the titles of the metrics to be displayed + :param metrics: the metrics to be displayed + :param time_title: the title to be displayed for the time dimension + :param time_dimension: the time dimension to be displayed + :param include_changes: whether to include the percent change columns, defaults to True + :param change_title_suffix: the suffix to be added to the change columns, defaults to " Change" + :param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None + :param additional_data_behavior: the behavior to use when adding the additional data, defaults to None + :param strftime_format: the format to use for the time dimension, defaults to "%Y-%m". None means a datetime will be returned + :param other_params: any other parameters to be passed to the get_df_over_time function, including service params + """ df_api = get_df_over_time( - ["Users", "Total Pageviews"], - ["activeUsers", "screenPageViews"], - "yearMonth", - sort_results=["yearMonth"], + metric_titles, + metrics, + time_dimension, + sort_results=[time_dimension], df_processor=(lambda df: df.set_index(df.index + "01")[-2::-1]), format_table=False, - **analytics_params + **other_params ) df_combined = pd.DataFrame() @@ -259,7 +289,13 @@ def get_change_over_time_df( df_combined = df_api if include_changes: - df_combined["Users Change"] = df_combined["Users"].pct_change() - df_combined["Total Pageviews Change"] = df_combined["Total Pageviews"].pct_change() - - return df_combined + assert change_title_suffix is not None + df_combined[ + [f"{title}{change_title_suffix}" for title in metric_titles] + ] = df_combined[metric_titles].pct_change(periods=-1).replace({np.inf: np.nan}) + + if strftime_format is not None: + df_combined.index = pd.to_datetime(df_combined.index).strftime(strftime_format) + + return df_combined.reset_index(names=time_title) + \ No newline at end of file From 28d11fa95794fe751cb33e5f7ce2aac4c1476086 Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 31 Jan 2025 16:49:37 -0800 Subject: [PATCH 4/5] chore: added missing docstrings in analytics package (#4353) --- analytics/analytics_package/analytics/api.py | 1 - .../analytics_package/analytics/sheets_api.py | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/analytics/analytics_package/analytics/api.py b/analytics/analytics_package/analytics/api.py index 378fc6174..86f8dd9b5 100644 --- a/analytics/analytics_package/analytics/api.py +++ b/analytics/analytics_package/analytics/api.py @@ -297,7 +297,6 @@ def build_params(source, subs): def results_to_df(results): - df = pd.DataFrame() for result in results: # Collect column nmes diff --git a/analytics/analytics_package/analytics/sheets_api.py b/analytics/analytics_package/analytics/sheets_api.py index 291680dea..36f98c5d7 100644 --- a/analytics/analytics_package/analytics/sheets_api.py +++ b/analytics/analytics_package/analytics/sheets_api.py @@ -377,12 +377,19 @@ def update_sheet_raw(sheets_authentication_response, sheet, *updates): @dataclass class WorksheetRange: + """ + A dataclass to represent a range of cells in a worksheet in the one-sided interval [top_left, bottom_right). + :param worksheet: the gspread.worksheet.Worksheet object + :param top_left: the top left cell of the range. This cell will be included in the range + :param bottom_right: the bottom right cell of the range. This cell will not be included in the range + """ worksheet: gspread.worksheet.Worksheet top_left: gspread.cell.Cell bottom_right: gspread.cell.Cell @property def range_dict(self): + """The range as a dictionary for the sources field in the Google Sheets api""" return { "sheetId": self.worksheet.id, "startRowIndex": self.top_left.row - 1, @@ -399,6 +406,16 @@ def _cell_to_grid_coordinate(cell, worksheet): } def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_type, domain, series, **chart_args): + """ + Add a chart to a specified workshet + :param sheets_authentication_response: the response from ga.authenticate. Must be for the sheets api v4 + :param sheet: the gspread.Spreadsheet object + :param worksheet: the gspread.Worksheet object + :param chart_type: the type of chart to add + :param domain: the domain of the chart as a WorksheetRange. Must contain either one row or one column + :param series: the series of the chart as a WorksheetRange. Must contain either one row or one column + :param chart_args: other arguments to create the chart. See DEFAULT_CHART_ARGS + """ complete_chart_args = {**DEFAULT_CHART_ARGS, **chart_args} if complete_chart_args["chart_position"] is not None: position_dict = { @@ -415,7 +432,6 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t formatted_domains = [ { "domain": { - #TODO: would be nice to also support column references https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#DataSourceColumnReference "sourceRange": { "sources": [ domain.range_dict @@ -425,6 +441,7 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t "reversed": complete_chart_args["invert_x_axis"], }, ] + formatted_series = [ { "series": { From 4c417b57aa4da3a4f384a05a949eceeb251d2932 Mon Sep 17 00:00:00 2001 From: jpaten Date: Fri, 31 Jan 2025 16:54:47 -0800 Subject: [PATCH 5/5] chore: bumped setup.py (#4353) --- analytics/analytics_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analytics/analytics_package/setup.py b/analytics/analytics_package/setup.py index 80cf2b71d..946b0f2c6 100644 --- a/analytics/analytics_package/setup.py +++ b/analytics/analytics_package/setup.py @@ -2,7 +2,7 @@ setup( name="analytics", - version="3.3.1", + version="3.4.0", packages=["analytics"], install_requires=["matplotlib", "pandas", "numpy", "google-auth-oauthlib", "google-api-python-client", "gspread", "gspread-formatting"], ) \ No newline at end of file