Skip to content

Commit

Permalink
feat: added datetime handling to analytics sheets package (#4353)
Browse files Browse the repository at this point in the history
  • Loading branch information
jpaten committed Jan 31, 2025
1 parent dbc8e19 commit 38a1258
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 14 deletions.
25 changes: 21 additions & 4 deletions analytics/analytics_package/analytics/sheets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class COLUMN_FORMAT_OPTIONS(Enum):
DEFAULT = 1
PERCENT_UNCOLORED = 2
PERCENT_COLORED = 3
YEAR_MONTH_DATE = 4


class CHART_TYPES(Enum):
Expand All @@ -41,6 +42,10 @@ class CHART_TYPES(Enum):
"extra_columns_width": 50,
}

DEFAULT_GSPREAD_UPDATE_ARGS = {
"value_input_option": gspread.utils.ValueInputOption.user_entered,
}

def extract_credentials(authentication_response):
"""Extracts the credentials from the tuple from api.authenticate"""
return authentication_response[3]
Expand Down Expand Up @@ -173,7 +178,8 @@ def fill_worksheet_with_df(
worksheet_name,
overlapBehavior,
sheet_formatting_options={},
column_formatting_options={}
column_formatting_options={},
**gspread_update_args
):
"""
Fill a worksheet with the contents of a DataFrame.
Expand Down Expand Up @@ -210,7 +216,10 @@ def fill_worksheet_with_df(
axis=1
)
# Add data to worksheet
worksheet.update([df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist())
worksheet.update(
[df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist(),
**{**DEFAULT_GSPREAD_UPDATE_ARGS, **gspread_update_args}
)

# Format worksheet
# Justify Column Widths
Expand Down Expand Up @@ -287,6 +296,13 @@ def fill_worksheet_with_df(
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
)
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE:
# Apply date format rule
gspread_formatting.format_cell_range(
worksheet,
column_range,
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm'))
)

# Apply base formatting options
gspread_formatting.format_cell_range(
Expand All @@ -299,7 +315,7 @@ def fill_worksheet_with_df(
if "Sheet1" in [i.title for i in sheet.worksheets()]:
sheet.del_worksheet(sheet.worksheet("Sheet1"))

def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}):
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, **gspread_update_args):
"""
Fill a sheet with the contents of a dictionary of DataFrames.
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
Expand All @@ -326,7 +342,8 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt
fill_worksheet_with_df(
sheet, df, worksheet_name, overlapBehavior,
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}),
column_formatting_options=column_formatting_options.get(worksheet_name, {})
column_formatting_options=column_formatting_options.get(worksheet_name, {}),
**gspread_update_args
)

def update_sheet_raw(sheets_authentication_response, sheet, *updates):
Expand Down
56 changes: 46 additions & 10 deletions analytics/analytics_package/analytics/sheets_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,17 +232,47 @@ class ADDITIONAL_DATA_BEHAVIOR(Enum):
ADD = "add"
REPLACE = "replace"

def get_page_views_over_time_df(analytics_params, additional_data_path=None, additional_data_behavior=None):
"""
Get a DataFrame with pageviews and total active users over time from the Analytics API.
:param analytics_params: the parameters for the Analytics API, including service params, start dates, and end dates
:param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None
:param additional_data_behavior: the behavior to use when adding the additional data, defaults to None
"""
return get_change_over_time_df(
["Users", "Total Pageviews"],
["activeUsers", "screenPageViews"],
["Month"],
"yearMonth",
additional_data_path=additional_data_path,
additional_data_behavior=additional_data_behavior,
**analytics_params
)

def get_change_over_time_df(
analytics_params, include_changes=True, additional_data_path=None, additional_data_behavior=None
metric_titles, metrics, time_title, time_dimension, include_changes=True, change_title_suffix = " Change", additional_data_path=None, additional_data_behavior=None, strftime_format="%Y-%m", **other_params
):
"""
Get a DataFrame with the change over time for the given metrics, renamed to match metric_titles
:param metric_titles: the titles of the metrics to be displayed
:param metrics: the metrics to be displayed
:param time_title: the title to be displayed for the time dimension
:param time_dimension: the time dimension to be displayed
:param include_changes: whether to include the percent change columns, defaults to True
:param change_title_suffix: the suffix to be added to the change columns, defaults to " Change"
:param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None
:param additional_data_behavior: the behavior to use when adding the additional data, defaults to None
:param strftime_format: the format to use for the time dimension, defaults to "%Y-%m". None means a datetime will be returned
:param other_params: any other parameters to be passed to the get_df_over_time function, including service params
"""
df_api = get_df_over_time(
["Users", "Total Pageviews"],
["activeUsers", "screenPageViews"],
"yearMonth",
sort_results=["yearMonth"],
metric_titles,
metrics,
time_dimension,
sort_results=[time_dimension],
df_processor=(lambda df: df.set_index(df.index + "01")[-2::-1]),
format_table=False,
**analytics_params
**other_params
)

df_combined = pd.DataFrame()
Expand All @@ -259,7 +289,13 @@ def get_change_over_time_df(
df_combined = df_api

if include_changes:
df_combined["Users Change"] = df_combined["Users"].pct_change()
df_combined["Total Pageviews Change"] = df_combined["Total Pageviews"].pct_change()

return df_combined
assert change_title_suffix is not None
df_combined[
[f"{title}{change_title_suffix}" for title in metric_titles]
] = df_combined[metric_titles].pct_change(periods=-1).replace({np.inf: np.nan})

if strftime_format is not None:
df_combined.index = pd.to_datetime(df_combined.index).strftime(strftime_format)

return df_combined.reset_index(names=time_title)

0 comments on commit 38a1258

Please sign in to comment.