Skip to content

Commit c3d6400

Browse files
committed
wip: initial commit
1 parent efe4bda commit c3d6400

File tree

1 file changed

+359
-0
lines changed

1 file changed

+359
-0
lines changed

portal/metrics.md

+359
Original file line numberDiff line numberDiff line change
@@ -1 +1,360 @@
1+
---
2+
kernelspec:
3+
name: python3
4+
display_name: Python 3
5+
---
6+
7+
```{code-cell} python3
8+
:tags: [remove-cell]
9+
10+
import datetime
11+
import json
12+
import os
13+
14+
import cartopy
15+
import google
16+
import matplotlib
17+
import matplotlib.cm as cm
18+
import matplotlib.colors as colors
19+
import matplotlib.pyplot as plt
20+
import numpy as np
21+
from google.analytics.data_v1beta import BetaAnalyticsDataClient
22+
from google.analytics.data_v1beta.types import DateRange, Dimension, Metric, RunReportRequest
23+
24+
# Project ID Numbers
25+
PORTAL_ID = '266784902'
26+
FOUNDATIONS_ID = '281776420'
27+
COOKBOOKS_ID = '324070631'
28+
29+
# Access Secrets
30+
PRIVATE_KEY_ID = os.environ.get('PRIVATE_KEY_ID')
31+
# Ensure GH secrets doesn't intrudce extra '\' new line characters (related to '\' being an escape character)
32+
PRIVATE_KEY = os.environ.get('PRIVATE_KEY').replace('\\n', '\n')
33+
34+
credentials_dict = {
35+
'type': 'service_account',
36+
'project_id': 'cisl-vast-pythia',
37+
'private_key_id': PRIVATE_KEY_ID,
38+
'private_key': PRIVATE_KEY,
39+
'client_email': '[email protected]',
40+
'client_id': '113402578114110723940',
41+
'auth_uri': 'https://accounts.google.com/o/oauth2/auth',
42+
'token_uri': 'https://oauth2.googleapis.com/token',
43+
'auth_provider_x509_cert_url': 'https://www.googleapis.com/oauth2/v1/certs',
44+
'client_x509_cert_url': 'https://www.googleapis.com/robot/v1/metadata/x509/pythia-metrics-api%40cisl-vast-pythia.iam.gserviceaccount.com',
45+
'universe_domain': 'googleapis.com',
46+
}
47+
48+
try:
49+
client = BetaAnalyticsDataClient.from_service_account_info(credentials_dict)
50+
except google.auth.exceptions.MalformedError as e:
51+
print('Malformed Error:', repr(e))
52+
# Insight into reason for failure without exposing secret key
53+
# 0: Secret not found, else malformed
54+
# 706: extra quote, 732: extra '\', 734: both
55+
print('Length of PRIVATE_KEY:', len(PRIVATE_KEY))
56+
57+
pre_project_date = '2020-03-31'
58+
```
59+
160
# Metrics
61+
62+
Last Updated: {eval}`str(datetime.datetime.now())`
63+
64+
This metrics page provides an overview of user activity collected by Google Analytics across the three pillars of Project Pythia: our portal which includes information about the project as well as our resource gallery, our Foundations book, and our Cookbooks gallery. Information is either all-time (from a pre-project start date of March 2020) or year-to-date as indicated and is updated nightly to provide real-time and automated insights into our engagement, impact, and audience reach. If you would like to request a different metrics analysis, timeframe, or view, please [open a GitHub issue](https://github.com/ProjectPythia/projectpythia.github.io/issues/new/choose).
65+
66+
## Table of Total Active Users by Project
67+
68+
``{code-cell} python3
69+
:tags: [remove-cell]
70+
71+
def _format_rounding(value):
72+
"""
73+
Helper function for rounding string displays. 1,232 -> 1.2K
74+
"""
75+
return f'{round(value / 1000, 1):.1f}K'
76+
77+
78+
# The rest of this file alternates between functions for requesting information from Google Analytics
79+
# And functions that use that request image to form either a .json or a .png file to be used in write-metrics-md.py
80+
def _run_total_users_report(property_id):
81+
"""
82+
Function for requesting cumulative active users from a project since project start.
83+
"""
84+
request = RunReportRequest(
85+
property=f'properties/{property_id}',
86+
dimensions=[],
87+
metrics=[Metric(name='activeUsers')],
88+
date_ranges=[DateRange(start_date=pre_project_date, end_date='today')],
89+
)
90+
response = client.run_report(request)
91+
92+
total_users = 0
93+
for row in response.rows:
94+
total_users += int(row.metric_values[0].value)
95+
96+
return _format_rounding(total_users)
97+
```
98+
99+
This table displays the total active users of our 3 Pythia projects over the life of Project Pythia. Google analytics defines active users as the number of unique people who have visited the site and met certain [engagement requirements](https://support.google.com/analytics/answer/9234069?sjid=8697784525616937194-NC). You can read more from the [GA4 "Understand User Metrics" documentation](https://support.google.com/analytics/answer/12253918?hl=en).
100+
101+
```{code-cell} python3
102+
:tags: [remove-cell]
103+
104+
portal_users = _run_total_users_report(PORTAL_ID)
105+
foundations_users = _run_total_users_report(FOUNDATIONS_ID)
106+
cookbooks_users = _run_total_users_report(COOKBOOKS_ID)
107+
```
108+
109+
| Project | All-Time Users |
110+
| ----------- | ------------------------- |
111+
| Portal | {eval}`portal_users` |
112+
| Foundations | {eval}`foundations_users` |
113+
| Cookbooks | {eval}`cookbooks_users` |
114+
115+
## Chart of Active Users by Project Since Year Start
116+
```{code-cell} python3
117+
:tags: [remove-cell]
118+
119+
def _run_active_users_this_year(property_id):
120+
"""
121+
Function for requesting active users by day from a project since year start.
122+
"""
123+
current_year = datetime.datetime.now().year
124+
start_date = f'{current_year}-01-01'
125+
126+
request = RunReportRequest(
127+
property=f'properties/{property_id}',
128+
dimensions=[Dimension(name='date')],
129+
metrics=[Metric(name='activeUsers')],
130+
date_ranges=[DateRange(start_date=start_date, end_date='today')],
131+
)
132+
response = client.run_report(request)
133+
134+
dates = []
135+
user_counts = []
136+
for row in response.rows:
137+
date_str = row.dimension_values[0].value
138+
date = datetime.datetime.strptime(date_str, '%Y%m%d')
139+
dates.append(date)
140+
user_counts.append(int(row.metric_values[0].value))
141+
142+
# Days need to be sorted chronologically
143+
return zip(*sorted(zip(dates, user_counts), key=lambda x: x[0]))
144+
145+
146+
def plot_projects_this_year(PORTAL_ID, FOUNDATIONS_ID, COOKBOOKS_ID):
147+
"""
148+
Function for taking year-to-date active users by day and plotting it for each project.
149+
"""
150+
portal_dates, portal_users = _run_active_users_this_year(PORTAL_ID)
151+
foundations_dates, foundations_users = _run_active_users_this_year(FOUNDATIONS_ID)
152+
cookbooks_dates, cookbooks_users = _run_active_users_this_year(COOKBOOKS_ID)
153+
154+
# Plotting code
155+
plt.figure(figsize=(10, 5.5))
156+
plt.title('Year-to-Date Pythia Active Users', fontsize=15)
157+
158+
plt.plot(portal_dates, portal_users, color='purple', label='Portal')
159+
plt.plot(foundations_dates, foundations_users, color='royalblue', label='Foundations')
160+
plt.plot(cookbooks_dates, cookbooks_users, color='indianred', label='Cookbooks')
161+
162+
plt.legend(fontsize=12, loc='upper right')
163+
164+
plt.xlabel('Date', fontsize=12)
165+
plt.savefig('portal/metrics/thisyear.png', bbox_inches='tight')
166+
167+
```
168+
This line plot displays active users for our 3 Pythia projects (Portal in purple, Foundations in blue, and Cookbooks in salmon) since January 1st of the current year.
169+
170+
```{code-cell} python3
171+
:tags: [remove-input]
172+
:caption: Chart of active users by project since year start.
173+
174+
plot_projects_this_year(PORTAL_ID, FOUNDATIONS_ID, COOKBOOKS_ID)
175+
```
176+
177+
## Chart of Top 5 Pages by Project
178+
```{code-cell} python3
179+
:tags: [remove-cell]
180+
181+
def _run_top_pages_report(property_id):
182+
"""
183+
Function for requesting top 5 pages from a project.
184+
"""
185+
request = RunReportRequest(
186+
property=f'properties/{property_id}',
187+
dimensions=[Dimension(name='pageTitle')],
188+
date_ranges=[DateRange(start_date=pre_project_date, end_date='today')],
189+
metrics=[Metric(name='screenPageViews')],
190+
)
191+
response = client.run_report(request)
192+
193+
views_dict = {}
194+
for row in response.rows:
195+
page = row.dimension_values[0].value
196+
views = int(row.metric_values[0].value)
197+
views_dict[page] = views
198+
199+
# Sort by views and grab the top 5
200+
top_pages = sorted(views_dict.items(), key=lambda item: item[1], reverse=True)[:5]
201+
# String manipulation on page titles "Cartopy - Pythia Foundations" -> "Cartopy"
202+
pages = [page.split('—')[0] for page, _ in top_pages]
203+
views = [views for _, views in top_pages]
204+
205+
# Reverse order of lists, so they'll plot with most visited page on top (i.e. last)
206+
return pages[::-1], views[::-1]
207+
def plot_top_pages(PORTAL_ID, FOUNDATIONS_ID, COOKBOOKS_ID):
208+
"""
209+
Function that takes the top 5 viewed pages for all 3 projects and plot them on a histogram.
210+
"""
211+
portal_pages, portal_views = _run_top_pages_report(PORTAL_ID)
212+
foundations_pages, foundations_views = _run_top_pages_report(FOUNDATIONS_ID)
213+
cookbooks_pages, cookbooks_views = _run_top_pages_report(COOKBOOKS_ID)
214+
215+
# Plotting code
216+
fig, ax = plt.subplots(figsize=(10, 5.5))
217+
plt.title('All-Time Top Pages', fontsize=15)
218+
219+
y = np.arange(5) # 0-4 for Cookbooks
220+
y2 = np.arange(6, 11) # 6-10 for Foundations
221+
y3 = np.arange(12, 17) # 12-16 for Portal
222+
223+
bar1 = ax.barh(y3, portal_views, align='center', label='Portal', color='purple')
224+
bar2 = ax.barh(y2, foundations_views, align='center', label='Foundations', color='royalblue')
225+
bar3 = ax.barh(y, cookbooks_views, align='center', label='Cookbooks', color='indianred')
226+
227+
y4 = np.append(y, y2)
228+
y4 = np.append(y4, y3) # 0-4,6-19,12-6 for page labels to have a gap between projects
229+
pages = cookbooks_pages + foundations_pages + portal_pages # List of all pages
230+
ax.set_yticks(y4, labels=pages, fontsize=12)
231+
232+
# Adds round-formatted views label to end of each bar
233+
ax.bar_label(bar1, fmt=_format_rounding, padding=5, fontsize=10)
234+
ax.bar_label(bar2, fmt=_format_rounding, padding=5, fontsize=10)
235+
ax.bar_label(bar3, fmt=_format_rounding, padding=5, fontsize=10)
236+
237+
ax.set_xscale('log')
238+
ax.set_xlim([10, 10**5]) # set_xlim must be after setting xscale to log
239+
ax.set_xlabel('Page Views', fontsize=12)
240+
241+
plt.legend(fontsize=12, loc='lower right')
242+
plt.savefig('portal/metrics/toppages.png', bbox_inches='tight')
243+
```
244+
This bar-chart displays the top 5 pages by project over the life of Project Pythia, as determined by screen page views. Screen page views refers to the number of times users viewed a page, including repeated visits. To learn more visit the [GA4 "API Dimensions & Metrics" page](https://developers.google.com/analytics/devguides/reporting/data/v1/api-schema).
245+
246+
```{code-cell} python3
247+
:tags: [remove-input]
248+
:caption: Bar chart of the top five pages by project over the life of Project Pythia
249+
250+
plot_top_pages(PORTAL_ID, FOUNDATIONS_ID, COOKBOOKS_ID)
251+
```
252+
253+
## Map of Total Foundation Active Users by Country
254+
```{code-cell} python3
255+
:tags: [remove-cell]
256+
257+
def _run_usersXcountry_report(property_id):
258+
"""
259+
Function for requesting users by country for a project.
260+
"""
261+
request = RunReportRequest(
262+
property=f'properties/{property_id}',
263+
dimensions=[Dimension(name='country')],
264+
metrics=[Metric(name='activeUsers')],
265+
date_ranges=[DateRange(start_date=pre_project_date, end_date='today')],
266+
)
267+
response = client.run_report(request)
268+
269+
user_by_country = {}
270+
for row in response.rows:
271+
country = row.dimension_values[0].value
272+
users = int(row.metric_values[0].value)
273+
user_by_country[country] = user_by_country.get(country, 0) + users
274+
275+
return user_by_country
276+
def plot_usersXcountry(FOUNDATIONS_ID):
277+
"""
278+
Function for taking users by country for Pythia Foundations and plotting them on a map.
279+
"""
280+
users_by_country = _run_usersXcountry_report(FOUNDATIONS_ID)
281+
282+
# Google API Country names do not match Cartopy Country Shapefile names
283+
dict_api2cartopy = {
284+
'Tanzania': 'United Republic of Tanzania',
285+
'United States': 'United States of America',
286+
'Congo - Kinshasa': 'Democratic Republic of the Congo',
287+
'Bahamas': 'The Bahamas',
288+
'Timor-Leste': 'East Timor',
289+
'C\u00f4te d\u2019Ivoire': 'Ivory Coast',
290+
'Bosnia & Herzegovina': 'Bosnia and Herzegovina',
291+
'Serbia': 'Republic of Serbia',
292+
'Trinidad & Tobago': 'Trinidad and Tobago',
293+
}
294+
295+
for key in dict_api2cartopy:
296+
users_by_country[dict_api2cartopy[key]] = users_by_country.pop(key)
297+
298+
# Sort by views and grab the top 10 countries for a text box
299+
top_10_countries = sorted(users_by_country.items(), key=lambda item: item[1], reverse=T
300+
rue)[:10]
301+
top_10_text = '\n'.join(
302+
f'{country}: {_format_rounding(value)}' for i, (country, value) in enumerate(top_10
303+
_countries)
304+
)
305+
306+
# Plotting code
307+
fig = plt.figure(figsize=(10, 4))
308+
ax = plt.axes(projection=cartopy.crs.PlateCarree(), frameon=False)
309+
ax.set_title('All-Time Pythia Foundations Users by Country', fontsize=15)
310+
311+
shapefile = cartopy.io.shapereader.natural_earth(category='cultural', resolution='110m', name='admin_0_countries')
312+
reader = cartopy.io.shapereader.Reader(shapefile)
313+
countries = reader.records()
314+
315+
colormap = plt.get_cmap('Blues')
316+
newcmp = colors.ListedColormap(colormap(np.linspace(0.2, 1, 128))) # Truncate colormap to remove white hues
317+
newcmp.set_extremes(under='grey')
318+
319+
norm = colors.LogNorm(vmin=1, vmax=max(users_by_country.values())) # Plot on log scale
320+
mappable = cm.ScalarMappable(norm=norm, cmap=newcmp)
321+
322+
# Loop through countries and plot their color
323+
for country in countries:
324+
country_name = country.attributes['SOVEREIGNT']
325+
if country_name in users_by_country.keys():
326+
facecolor = newcmp(norm(users_by_country[country_name]))
327+
ax.add_geometries(
328+
[country.geometry],
329+
cartopy.crs.PlateCarree(),
330+
facecolor=facecolor,
331+
edgecolor='white',
332+
linewidth=0.7,
333+
norm=matplotlib.colors.LogNorm(),
334+
)
335+
else:
336+
ax.add_geometries(
337+
[country.geometry], cartopy.crs.PlateCarree(), facecolor='grey', edgecolor='white', linewidth=0.7
338+
)
339+
340+
# Add colorbar
341+
cax = fig.add_axes([0.05, -0.015, 0.7, 0.03]) # [x0, y0, width, height]
342+
cbar = fig.colorbar(mappable=mappable, cax=cax, spacing='uniform', orientation='horizontal', extend='min')
343+
cbar.set_label('Unique Users')
344+
345+
# Add top 10 countries text
346+
props = dict(boxstyle='round', facecolor='white', edgecolor='white')
347+
ax.text(1.01, 0.5, top_10_text, transform=ax.transAxes, fontsize=12, verticalalignment='center', bbox=props)
348+
349+
plt.tight_layout()
350+
plt.savefig('portal/metrics/bycountry.png', bbox_inches='tight')
351+
```
352+
This map displays the number of active users per country for Pythia Foundations for the entire life of Project Pythia.
353+
354+
```{code-cell} python3
355+
:tags: [remove-input]
356+
:caption: Map of the number of active users per country for Pythia Foundations for the entire life of Project Pythia.
357+
358+
plot_usersXcountry(FOUNDATIONS_ID)
359+
```
360+

0 commit comments

Comments
 (0)