Skip to content

Commit dbc8e19

Browse files
author
jpaten
committed
feat: added change over time sheets to analytics package (#4353)
1 parent c124ac9 commit dbc8e19

File tree

3 files changed

+83
-23
lines changed

3 files changed

+83
-23
lines changed

analytics/analytics_package/analytics/charts.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,8 @@ def show_plot(df, title, fontsize=16, **other_params):
416416
fig.suptitle(title, fontsize=fontsize)
417417
plt.show()
418418

419-
def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
420-
titles, xlabels, metrics = strings_to_lists(titles, xlabels, metrics)
419+
def get_df_over_time(xlabels, metrics, dimensions, df_filter=None, **other_params):
420+
xlabels, metrics = strings_to_lists(xlabels, metrics)
421421

422422
df = get_data_df(metrics, dimensions, **other_params)
423423

@@ -430,6 +430,11 @@ def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_t
430430
# Rename for display
431431
df.rename(columns={name: xlabels[i] for i, name in enumerate(df.columns)}, inplace=True)
432432

433+
return df
434+
435+
def show_plot_over_time(titles, xlabels, metrics, dimensions="ga:date", format_table=True, df_filter=None, pre_plot_df_processor=None, **other_params):
436+
df = get_df_over_time(xlabels, metrics, dimensions, df_filter=df_filter, **other_params)
437+
433438
if (not pre_plot_df_processor is None):
434439
df = pre_plot_df_processor(df)
435440

analytics/analytics_package/analytics/sheets_api.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import gspread
44
import gspread_formatting
55
from enum import Enum
6+
import pandas as pd
67

78
FONT_SIZE_PTS = 10
89
PTS_PIXELS_RATIO = 4/3
@@ -35,7 +36,9 @@ class CHART_TYPES(Enum):
3536
"bold_header": True,
3637
"center_header": True,
3738
"freeze_header": True,
38-
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS}
39+
"column_widths": {"justify": True, "buffer_chars": DEFAULT_BUFFER_CHARS},
40+
"extra_columns": 0,
41+
"extra_columns_width": 50,
3942
}
4043

4144
def extract_credentials(authentication_response):
@@ -169,7 +172,7 @@ def fill_worksheet_with_df(
169172
df,
170173
worksheet_name,
171174
overlapBehavior,
172-
sheet_formatting_options=DEFAULT_SHEET_FORMATTING_OPTIONS,
175+
sheet_formatting_options={},
173176
column_formatting_options={}
174177
):
175178
"""
@@ -199,38 +202,47 @@ def fill_worksheet_with_df(
199202
title=worksheet_name, rows=df.shape[0], cols=df.shape[1]
200203
)
201204

205+
sheet_formatting_options_filled = {**DEFAULT_SHEET_FORMATTING_OPTIONS, **sheet_formatting_options}
206+
207+
# Add extra blank columns to the right of the worksheet
208+
df_to_insert = pd.concat(
209+
[df] + [pd.Series(" ", index=df.index, name="")] * sheet_formatting_options_filled["extra_columns"],
210+
axis=1
211+
)
202212
# Add data to worksheet
203-
worksheet.update([df.columns.values.tolist()] + df.fillna("NA").values.tolist())
213+
worksheet.update([df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist())
204214

205215
# Format worksheet
206216
# Justify Column Widths
207-
if "column_widths" not in sheet_formatting_options or sheet_formatting_options["column_widths"]["justify"]:
217+
if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]:
208218
text_widths = df.astype(str).columns.map(
209219
lambda column_name: df[column_name].astype(str).str.len().max()
210220
)
211221
header_widths = df.columns.str.len()
212222
buffer_chars = (
213223
DEFAULT_BUFFER_CHARS
214-
if ("column_widths" not in sheet_formatting_options or "buffer_chars" not in sheet_formatting_options["column_widths"])
215-
else sheet_formatting_options["column_widths"]["buffer_chars"]
224+
if ("column_widths" not in sheet_formatting_options_filled or "buffer_chars" not in sheet_formatting_options_filled["column_widths"])
225+
else sheet_formatting_options_filled["column_widths"]["buffer_chars"]
216226
)
217-
column_widths = [
227+
data_column_widths = [
218228
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
219229
for len_tuple in zip(text_widths, header_widths)
220230
]
231+
extra_column_widths = [sheet_formatting_options_filled["extra_columns_width"]] * sheet_formatting_options_filled["extra_columns"]
232+
combined_column_widths = data_column_widths + extra_column_widths
221233
column_positions = [
222-
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(column_widths)
234+
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(combined_column_widths)
223235
]
224-
gspread_formatting.set_column_widths(worksheet, zip(column_positions, column_widths))
236+
gspread_formatting.set_column_widths(worksheet, zip(column_positions, combined_column_widths))
225237
# Freeze Header
226-
if "freeze_header" not in sheet_formatting_options or sheet_formatting_options["freeze_header"]:
238+
if "freeze_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["freeze_header"]:
227239
gspread_formatting.set_frozen(worksheet, rows=1)
228240
base_format_options = gspread_formatting.CellFormat()
229241
# Bold Header
230-
if "bold_header" not in sheet_formatting_options or sheet_formatting_options["bold_header"]:
242+
if "bold_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["bold_header"]:
231243
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
232244
# Center Header
233-
if "center_header" not in sheet_formatting_options or sheet_formatting_options["center_header"]:
245+
if "center_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["center_header"]:
234246
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
235247
# Handle column specific formatting
236248
for column in column_formatting_options:
@@ -313,7 +325,7 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt
313325
for worksheet_name, df in df_dict.items():
314326
fill_worksheet_with_df(
315327
sheet, df, worksheet_name, overlapBehavior,
316-
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, DEFAULT_SHEET_FORMATTING_OPTIONS),
328+
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}),
317329
column_formatting_options=column_formatting_options.get(worksheet_name, {})
318330
)
319331

@@ -338,7 +350,12 @@ def update_sheet_raw(sheets_authentication_response, sheet, *updates):
338350
"title": "",
339351
"x_axis_title": "",
340352
"y_axis_title": "",
341-
"chart_position": None # Means it will be created in a new sheet
353+
"invert_x_axis": False,
354+
"chart_position": None, # None means it will be created in a new sheet
355+
"chart_position_offset_x": 0,
356+
"chart_position_offset_y": 0,
357+
"chart_width": 600,
358+
"chart_height": 371,
342359
}
343360

344361
@dataclass
@@ -366,11 +383,14 @@ def _cell_to_grid_coordinate(cell, worksheet):
366383

367384
def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_type, domain, series, **chart_args):
368385
complete_chart_args = {**DEFAULT_CHART_ARGS, **chart_args}
369-
print(worksheet.id)
370386
if complete_chart_args["chart_position"] is not None:
371387
position_dict = {
372388
"overlayPosition": {
373-
"anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet)
389+
"anchorCell": _cell_to_grid_coordinate(complete_chart_args["chart_position"], worksheet),
390+
"offsetXPixels": complete_chart_args["chart_position_offset_x"],
391+
"offsetYPixels": complete_chart_args["chart_position_offset_y"],
392+
"widthPixels": complete_chart_args["chart_width"],
393+
"heightPixels": complete_chart_args["chart_height"],
374394
}
375395
}
376396
else:
@@ -385,6 +405,7 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t
385405
],
386406
},
387407
},
408+
"reversed": complete_chart_args["invert_x_axis"],
388409
},
389410
]
390411
formatted_series = [
@@ -411,8 +432,6 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t
411432
"title": complete_chart_args["y_axis_title"],
412433
"position": "LEFT_AXIS",
413434
})
414-
print(formatted_domains)
415-
print(formatted_series)
416435
request = {
417436
"addChart": {
418437
"chart": {
@@ -432,7 +451,6 @@ def add_chart_to_sheet(sheets_authentication_response, sheet, worksheet, chart_t
432451
},
433452
},
434453
}
435-
print(request)
436454

437455
response = update_sheet_raw(sheets_authentication_response, sheet, request)
438456
return response

analytics/analytics_package/analytics/sheets_elements.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from enum import Enum
12
import numpy as np
23
import pandas as pd
3-
from .charts import get_data_df
4+
from .charts import get_data_df, get_df_over_time
45
from .fields import *
56
from urllib.parse import urlparse
67
import datetime as dt
@@ -225,4 +226,40 @@ def get_change(series_current, series_previous, start_current, end_current, star
225226
# Adjust the values from the prior series to account for the different number of days in the month
226227
series_previous_reindexed = (series_previous.reindex(combined_index) * current_length / previous_length)
227228
change = ((series_current_reindexed / series_previous_reindexed) - 1).replace({np.inf: np.nan})
228-
return change
229+
return change
230+
231+
class ADDITIONAL_DATA_BEHAVIOR(Enum):
232+
ADD = "add"
233+
REPLACE = "replace"
234+
235+
def get_change_over_time_df(
236+
analytics_params, include_changes=True, additional_data_path=None, additional_data_behavior=None
237+
):
238+
df_api = get_df_over_time(
239+
["Users", "Total Pageviews"],
240+
["activeUsers", "screenPageViews"],
241+
"yearMonth",
242+
sort_results=["yearMonth"],
243+
df_processor=(lambda df: df.set_index(df.index + "01")[-2::-1]),
244+
format_table=False,
245+
**analytics_params
246+
)
247+
248+
df_combined = pd.DataFrame()
249+
250+
if additional_data_path is not None:
251+
assert additional_data_behavior is not None
252+
df_saved = pd.read_json(additional_data_path)
253+
if additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR.ADD:
254+
df_combined = df_api.add(df_saved.astype(int), fill_value=0)[::-1]
255+
elif additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR.REPLACE:
256+
df_combined = pd.concat([df_saved, df_api], ignore_index=False)
257+
df_combined = df_combined.loc[~df_combined.index.duplicated(keep="first")].sort_index(ascending=False)
258+
else:
259+
df_combined = df_api
260+
261+
if include_changes:
262+
df_combined["Users Change"] = df_combined["Users"].pct_change()
263+
df_combined["Total Pageviews Change"] = df_combined["Total Pageviews"].pct_change()
264+
265+
return df_combined

0 commit comments

Comments
 (0)