From efc0ff86db48524aba8a67c0190880463f326570 Mon Sep 17 00:00:00 2001 From: Jonah Paten Date: Fri, 31 Jan 2025 13:28:15 -0800 Subject: [PATCH] feat: page view report for hca data explorer (#4354) * feat: created analytics sheets notebook for hca browser (#4317) * chore: updated analytics package version to 3.3.1 (#4317) --- analytics/hca-explorer-sheets/constants.py | 20 +++ .../generate_sheets_report.ipynb | 118 ++++++++++++++++++ analytics/hca-explorer-sheets/readme.md | 16 +++ analytics/requirements.txt | 49 +++++++- 4 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 analytics/hca-explorer-sheets/constants.py create mode 100644 analytics/hca-explorer-sheets/generate_sheets_report.ipynb create mode 100644 analytics/hca-explorer-sheets/readme.md diff --git a/analytics/hca-explorer-sheets/constants.py b/analytics/hca-explorer-sheets/constants.py new file mode 100644 index 000000000..9512dce05 --- /dev/null +++ b/analytics/hca-explorer-sheets/constants.py @@ -0,0 +1,20 @@ +# CHANGE THESE VALUES TO GENERATE NEW REPORTS +# The start and end dates of the current month (yyyy-mm-dd) +START_DATE_CURRENT = "2024-12-01" +END_DATE_CURRENT = "2024-12-31" +# The start and end dates of the prior months +START_DATE_PRIOR = "2024-11-01" +END_DATE_PRIOR = "2024-11-30" +# The name of the folder in which to save the report +PARENT_FOLDER_NAME = "December 2024 (demos)" + +# The name of the spreadsheet with the report +SHEET_NAME = "HCA Explorer" + +HCA_PORTAL_ID = "361323030" +# Filter to exclude the Data Explorer +HCA_BROWSER_EXCLUDE_FILTER = {"filter": {"fieldName": "hostName", "stringFilter": {"matchType": "EXACT", "value": "explore.data.humancellatlas.org"}}} +SECRET_NAME = "HCA_ANALYTICS_REPORTING_CLIENT_SECRET_PATH" +ANALYTICS_START = "2021-01-01" + +OAUTH_PORT = 8082 \ No newline at end of file diff --git a/analytics/hca-explorer-sheets/generate_sheets_report.ipynb b/analytics/hca-explorer-sheets/generate_sheets_report.ipynb new file mode 100644 index 000000000..fdd23624f --- /dev/null +++ b/analytics/hca-explorer-sheets/generate_sheets_report.ipynb @@ -0,0 +1,118 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Update this line to the path of your ga4 credentials. Make sure these are never stored in a version controlled folder.\n", + "%env HCA_ANALYTICS_REPORTING_CLIENT_SECRET_PATH=../../../do_not_commit_ga4_credentials.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from analytics import sheets_api as sheets\n", + "from analytics import sheets_elements as elements\n", + "from analytics import api as ga\n", + "import pandas as pd\n", + "from constants import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ga_authentication, drive_authentication = ga.authenticate(\n", + " SECRET_NAME,\n", + " ga.ga4_service_params,\n", + " ga.drive_service_params,\n", + " port=OAUTH_PORT\n", + ")\n", + "\n", + "date_string = f\"{START_DATE_CURRENT} - {END_DATE_CURRENT}\"\n", + "\n", + "default_params = {\n", + " \"service_system\": ga_authentication,\n", + " \"start_date\": START_DATE_CURRENT,\n", + " \"end_date\": END_DATE_CURRENT,\n", + "}\n", + "\n", + "hca_portal_params = {\n", + " **default_params,\n", + " \"base_dimension_filter\": HCA_BROWSER_EXCLUDE_FILTER,\n", + " \"property\": HCA_PORTAL_ID,\n", + "}\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_pageviews = elements.get_page_views_change(hca_portal_params, START_DATE_CURRENT, END_DATE_CURRENT, START_DATE_PRIOR, END_DATE_PRIOR) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dict_spreadsheet = {\n", + " \"Page Views\": df_pageviews,\n", + "}\n", + "sheets.fill_spreadsheet_with_df_dict(\n", + " sheets.create_sheet_in_folder(\n", + " drive_authentication,\n", + " SHEET_NAME,\n", + " PARENT_FOLDER_NAME,\n", + " override_behavior=sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE\n", + " ),\n", + " dict_spreadsheet,\n", + " sheets.FILE_OVERRIDE_BEHAVIORS.OVERRIDE_IF_IN_SAME_PLACE,\n", + " column_formatting_options={\n", + " \"Outbound Links\": {\n", + " \"Total Clicks Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n", + " \"Total Users Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n", + " },\n", + " \"Page Views\": {\n", + " \"Total Views Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n", + " \"Total Users Percent Change\": sheets.COLUMN_FORMAT_OPTIONS.PERCENT_COLORED,\n", + " }\n", + "\n", + " }\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/analytics/hca-explorer-sheets/readme.md b/analytics/hca-explorer-sheets/readme.md new file mode 100644 index 000000000..ff4a1b458 --- /dev/null +++ b/analytics/hca-explorer-sheets/readme.md @@ -0,0 +1,16 @@ +## Installing the environment +- Use Python 3.12.4 +- Run `python -m venv ./venv` to create a new environment under `./venv`. +- Run `source ./venv/bin/activate` to activate the environment. +- Run `pip install -r ./requirements.txt` to install requirements. + +## Deactivating/reactivating +- To deactivate the environment, run `deactivate`. +- To activate the environment again, run `source ./venv/bin/activate`. + +## Generating Reports +- Update `constants.py` to reflect the date ranges and file name you would like for the report +- Open `./generate_sheets_report.ipynb` using your favorite IDE or by running `jupyter notebook` and selecting it from the browser window that appears. +- Add a path to your Google Cloud credentials in the first cell as instructed by the comments. +- Run all cells in the Jupyter notebook by pressing the button with two arrows at the top. You will be prompted to log in to your Google Account, which must have access to the relevant analytics property. +- Check your Google Drive to ensure that the desired spreadsheet is present. diff --git a/analytics/requirements.txt b/analytics/requirements.txt index fa7f71974..5d3f308a2 100644 --- a/analytics/requirements.txt +++ b/analytics/requirements.txt @@ -1,14 +1,21 @@ accessible-pygments==0.0.5 alabaster==0.7.16 --e git+https://github.com/DataBiosphere/data-browser.git@60dc83eb36468c1f8091136f8e0534b3cba81b44#egg=analytics&subdirectory=analytics/analytics_package +-e git+https://github.com/DataBiosphere/data-browser.git@e0ce2c7464107bbbc166f7e21fcc3c4426b6e553#egg=analytics&subdirectory=analytics/analytics_package +anyio==4.7.0 appdirs==1.4.4 appnope==0.1.4 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 asttokens==2.4.1 +async-lru==2.0.4 attrs==24.2.0 babel==2.16.0 beautifulsoup4==4.12.3 +bleach==6.2.0 cachetools==5.5.0 certifi==2024.8.30 +cffi==1.17.1 charset-normalizer==3.3.2 click==8.1.7 comm==0.2.2 @@ -16,10 +23,12 @@ contourpy==1.3.0 cycler==0.12.1 debugpy==1.8.5 decorator==5.1.1 +defusedxml==0.7.1 docutils==0.20.1 executing==2.1.0 fastjsonschema==2.20.0 fonttools==4.53.1 +fqdn==1.5.1 google-api-core==2.19.2 google-api-python-client==2.146.0 google-auth==2.34.0 @@ -28,20 +37,37 @@ google-auth-oauthlib==1.2.1 googleapis-common-protos==1.65.0 gspread==6.1.4 gspread-formatting==1.2.0 +h11==0.14.0 +httpcore==1.0.7 httplib2==0.22.0 +httpx==0.28.1 idna==3.10 imagesize==1.4.1 importlib_metadata==8.5.0 ipykernel==6.29.5 ipython==8.27.0 +ipywidgets==8.1.5 +isoduration==20.11.0 jedi==0.19.1 Jinja2==3.1.4 +json5==0.10.0 +jsonpointer==3.0.0 jsonschema==4.23.0 jsonschema-specifications==2023.12.1 +jupyter==1.1.1 jupyter-book==1.0.2 jupyter-cache==1.0.0 +jupyter-console==6.6.3 +jupyter-events==0.10.0 +jupyter-lsp==2.2.5 jupyter_client==8.6.3 jupyter_core==5.7.2 +jupyter_server==2.14.2 +jupyter_server_terminals==0.5.3 +jupyterlab==4.3.3 +jupyterlab_pygments==0.3.0 +jupyterlab_server==2.27.3 +jupyterlab_widgets==3.0.13 kiwisolver==1.4.7 latexcodec==3.0.0 linkify-it-py==2.0.3 @@ -51,19 +77,26 @@ matplotlib==3.9.2 matplotlib-inline==0.1.7 mdit-py-plugins==0.4.2 mdurl==0.1.2 +mistune==3.0.2 myst-nb==1.1.1 myst-parser==2.0.0 nbclient==0.10.0 +nbconvert==7.16.4 nbformat==5.10.4 nest-asyncio==1.6.0 +notebook==7.3.1 +notebook_shim==0.2.4 numpy==2.1.1 oauthlib==3.2.2 +overrides==7.7.0 packaging==24.1 pandas==2.2.2 +pandocfilters==1.5.1 parso==0.8.4 pexpect==4.9.0 pillow==10.4.0 platformdirs==4.3.6 +prometheus_client==0.21.1 prompt_toolkit==3.0.47 proto-plus==1.24.0 protobuf==5.28.1 @@ -74,22 +107,28 @@ pyasn1==0.6.1 pyasn1_modules==0.4.1 pybtex==0.24.0 pybtex-docutils==1.0.3 +pycparser==2.22 pydata-sphinx-theme==0.15.4 pyee==11.1.1 Pygments==2.18.0 pyparsing==3.1.4 pyppeteer==2.0.0 python-dateutil==2.9.0.post0 +python-json-logger==3.2.1 pytz==2024.2 PyYAML==6.0.2 pyzmq==26.2.0 referencing==0.35.1 requests==2.32.3 requests-oauthlib==2.0.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 rpds-py==0.20.0 rsa==4.9 +Send2Trash==1.8.3 setuptools==75.1.0 six==1.16.0 +sniffio==1.3.1 snowballstemmer==2.2.0 soupsieve==2.6 Sphinx==7.4.7 @@ -112,15 +151,23 @@ sphinxcontrib-serializinghtml==2.0.0 SQLAlchemy==2.0.35 stack-data==0.6.3 tabulate==0.9.0 +terminado==0.18.1 +tinycss2==1.4.0 tornado==6.4.1 tqdm==4.66.5 traitlets==5.14.3 +types-python-dateutil==2.9.0.20241206 typing_extensions==4.12.2 tzdata==2024.1 uc-micro-py==1.0.3 +uri-template==1.3.0 uritemplate==4.1.1 urllib3==1.26.20 wcwidth==0.2.13 +webcolors==24.11.1 +webencodings==0.5.1 +websocket-client==1.8.0 websockets==10.4 wheel==0.44.0 +widgetsnbextension==4.0.13 zipp==3.20.2