Skip to content

Commit b648f9e

Browse files
author
Jonah Paten
authored
feat: added anvil catalog analytics reports (#4372) (#4379)
1 parent 3141f3e commit b648f9e

File tree

5 files changed

+194
-10
lines changed

5 files changed

+194
-10
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# CHANGE THESE VALUES TO GENERATE NEW REPORTS
2+
# The start and end dates of the current month (yyyy-mm-dd)
3+
START_DATE_CURRENT = "2025-01-01"
4+
END_DATE_CURRENT = "2025-01-31"
5+
# The start and end dates of the prior months
6+
START_DATE_PRIOR = "2024-12-01"
7+
END_DATE_PRIOR = "2024-12-31"
8+
# The name of the folder in which to save the report
9+
PARENT_FOLDER_NAME = "January 2025"
10+
11+
# The name of the spreadsheet with the report
12+
SHEET_NAME = "AnVIL Catalog"
13+
14+
ANVIL_CATALOG_ID = "368661710"
15+
# Filter to exclude the Data Explorer
16+
SECRET_NAME = "ANVIL_ANALYTICS_REPORTING_CLIENT_SECRET_PATH"
17+
ANALYTICS_START = "2023-05-01"
18+
HISTORIC_UA_DATA_PATH = "users_over_time_history.json"
19+
20+
OAUTH_PORT = 8082
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# Update this line to the path of your ga4 credentials. Make sure these are never stored in a version controlled folder.\n",
10+
"%env ANVIL_ANALYTICS_REPORTING_CLIENT_SECRET_PATH=../../../do_not_commit_ga4_credentials.json"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"from analytics import sheets_api as sheets\n",
20+
"from analytics import sheets_elements as elements\n",
21+
"from analytics import api as ga\n",
22+
"import pandas as pd\n",
23+
"from constants import *\n",
24+
"import gspread"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"ga_authentication, drive_authentication, sheets_authentication = ga.authenticate(\n",
34+
" SECRET_NAME,\n",
35+
" ga.ga4_service_params,\n",
36+
" ga.drive_service_params,\n",
37+
" ga.sheets_service_params,\n",
38+
" port=OAUTH_PORT\n",
39+
")\n",
40+
"\n",
41+
"date_string = f\"{START_DATE_CURRENT} - {END_DATE_CURRENT}\"\n",
42+
"\n",
43+
"default_params = {\n",
44+
" \"service_system\": ga_authentication,\n",
45+
" \"start_date\": START_DATE_CURRENT,\n",
46+
" \"end_date\": END_DATE_CURRENT,\n",
47+
"}\n",
48+
"\n",
49+
"anvil_catalog_params = {\n",
50+
" **default_params,\n",
51+
" \"property\": ANVIL_CATALOG_ID,\n",
52+
"}\n",
53+
"\n",
54+
"anvil_catalog_params_all_time = {\n",
55+
" **anvil_catalog_params,\n",
56+
" \"start_date\": ANALYTICS_START,\n",
57+
"}"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": null,
63+
"metadata": {},
64+
"outputs": [],
65+
"source": [
66+
"df_monthly_pageviews = elements.get_page_views_over_time_df(anvil_catalog_params_all_time, additional_data_path=HISTORIC_UA_DATA_PATH, additional_data_behavior=elements.ADDITIONAL_DATA_BEHAVIOR.ADD)\n",
67+
"df_pageviews = elements.get_page_views_change(anvil_catalog_params, START_DATE_CURRENT, END_DATE_CURRENT, START_DATE_PRIOR, END_DATE_PRIOR)\n",
68+
"df_outbound = elements.get_outbound_links_change(anvil_catalog_params, START_DATE_CURRENT, END_DATE_CURRENT, START_DATE_PRIOR, END_DATE_PRIOR)"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": null,
74+
"metadata": {},
75+
"outputs": [],
76+
"source": [
77+
"dict_spreadsheet = {\n",
78+
" \"Monthly Traffic Summary\": df_monthly_pageviews,\n",
79+
" \"Pageviews\": df_pageviews,\n",
80+
" \"Outbound Links\": df_outbound,\n",
81+
"}\n",
82+
"sheet = sheets.create_sheet_in_folder(\n",
83+
" drive_authentication,\n",
84+
" SHEET_NAME,\n",
85+
" PARENT_FOLDER_NAME,\n",
86+
" override_behavior=sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE\n",
87+
" )\n",
88+
"sheets.fill_spreadsheet_with_df_dict(\n",
89+
" sheet,\n",
90+
" dict_spreadsheet,\n",
91+
" sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE,\n",
92+
" column_formatting_options={\n",
93+
" \"Monthly Traffic Summary\": {\n",
94+
" \"Month\": sheets.COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE,\n",
95+
" \"Users Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
96+
" \"Total Pageviews Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
97+
" },\n",
98+
" \"Outbound Links\": {\n",
99+
" \"Total Clicks Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
100+
" \"Total Users Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
101+
" },\n",
102+
" \"Pageviews\": {\n",
103+
" \"Total Views Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
104+
" \"Total Users Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n",
105+
" },\n",
106+
"\n",
107+
" },\n",
108+
" sheet_formatting_options={\n",
109+
" \"Monthly Traffic Summary\": {\n",
110+
" \"extra_columns\": 1,\n",
111+
" \"extra_columns_width\": 2000\n",
112+
" }\n",
113+
" }\n",
114+
")\n",
115+
"monthly_traffic_worksheet = sheet.worksheet(\"Monthly Traffic Summary\")\n",
116+
"date_range = sheets.WorksheetRange(\n",
117+
" monthly_traffic_worksheet, \n",
118+
" gspread.cell.Cell(1, 1), \n",
119+
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 2)\n",
120+
")\n",
121+
"users_range = sheets.WorksheetRange(\n",
122+
" monthly_traffic_worksheet, \n",
123+
" gspread.cell.Cell(1, 2), \n",
124+
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 3)\n",
125+
")\n",
126+
"pageviews_range = sheets.WorksheetRange(\n",
127+
" monthly_traffic_worksheet, \n",
128+
" gspread.cell.Cell(1, 3), \n",
129+
" gspread.cell.Cell(df_monthly_pageviews.index.size + 1, 4)\n",
130+
")\n",
131+
"sheets.add_chart_to_sheet(\n",
132+
" sheets_authentication,\n",
133+
" sheet,\n",
134+
" sheet.worksheet(\"Monthly Traffic Summary\"),\n",
135+
" sheets.CHART_TYPES.LINE,\n",
136+
" date_range,\n",
137+
" [users_range, pageviews_range],\n",
138+
" chart_position=gspread.cell.Cell(1, 6),\n",
139+
" chart_position_offset_x=75,\n",
140+
" chart_position_offset_y=75,\n",
141+
" title=\"Pageviews and Users Over Time\"\n",
142+
")"
143+
]
144+
}
145+
],
146+
"metadata": {
147+
"kernelspec": {
148+
"display_name": "venv",
149+
"language": "python",
150+
"name": "python3"
151+
},
152+
"language_info": {
153+
"codemirror_mode": {
154+
"name": "ipython",
155+
"version": 3
156+
},
157+
"file_extension": ".py",
158+
"mimetype": "text/x-python",
159+
"name": "python",
160+
"nbconvert_exporter": "python",
161+
"pygments_lexer": "ipython3",
162+
"version": "3.12.8"
163+
}
164+
},
165+
"nbformat": 4,
166+
"nbformat_minor": 4
167+
}
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
## Generating Reports
2+
- Update `constants.py` to reflect the date ranges and file name you would like for the report
3+
- Open `./generate_sheets_report.ipynb` using your favorite IDE or by running `jupyter notebook` and selecting it from the browser window that appears.
4+
- Add a path to your Google Cloud credentials in the first cell as instructed by the comments.
5+
- Run all cells in the Jupyter notebook by pressing the button with two arrows at the top. You will be prompted to log in to your Google Account, which must have access to the relevant analytics property.
6+
- Check your Google Drive to ensure that the desired spreadsheet is present.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"Users":{"1680307200000":241,"1677628800000":252,"1675209600000":323,"1672531200000":252,"1669852800000":169,"1667260800000":296,"1664582400000":350,"1661990400000":228,"1659312000000":279,"1656633600000":220,"1654041600000":251,"1651363200000":263,"1648771200000":295,"1646092800000":310,"1643673600000":343,"1640995200000":431,"1638316800000":212,"1635724800000":237,"1633046400000":243,"1630454400000":221,"1627776000000":204,"1625097600000":238,"1622505600000":230,"1619827200000":224,"1617235200000":135,"1614556800000":0},"Total Pageviews":{"1680307200000":1038,"1677628800000":1268,"1675209600000":1694,"1672531200000":1153,"1669852800000":552,"1667260800000":1364,"1664582400000":1567,"1661990400000":1301,"1659312000000":1469,"1656633600000":1182,"1654041600000":1422,"1651363200000":2015,"1648771200000":2302,"1646092800000":2222,"1643673600000":2311,"1640995200000":2734,"1638316800000":1510,"1635724800000":1704,"1633046400000":1734,"1630454400000":1986,"1627776000000":1683,"1625097600000":1641,"1622505600000":1248,"1619827200000":952,"1617235200000":509,"1614556800000":0}}

analytics/hca-explorer-sheets/readme.md

-10
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,3 @@
1-
## Installing the environment
2-
- Use Python 3.12.4
3-
- Run `python -m venv ./venv` to create a new environment under `./venv`.
4-
- Run `source ./venv/bin/activate` to activate the environment.
5-
- Run `pip install -r ./requirements.txt` to install requirements.
6-
7-
## Deactivating/reactivating
8-
- To deactivate the environment, run `deactivate`.
9-
- To activate the environment again, run `source ./venv/bin/activate`.
10-
111
## Generating Reports
122
- Update `constants.py` to reflect the date ranges and file name you would like for the report
133
- Open `./generate_sheets_report.ipynb` using your favorite IDE or by running `jupyter notebook` and selecting it from the browser window that appears.

0 commit comments

Comments
 (0)