Skip to content

Commit cdf8174

Browse files
Tech Report: Audit pass rates (#49)
* versions * tech filter * new table with versions * typo * versions table * fix * no retries * tech_report_* tables * clusters renamed * lint * adjust export config * fix clustering * origin renamed * deduplicated good_cwv * include minor * fix * cleanup * pattern fix * tech detections only * fix * relaxed pattern * remove similar_technologies * audits pass rates * pass_rate * fix * drop PWA
1 parent e7d41ab commit cdf8174

File tree

5 files changed

+291
-94
lines changed

5 files changed

+291
-94
lines changed

definitions/output/reports/tech_crux.js

+176-67
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,31 @@ CREATE TEMP FUNCTION IS_NON_ZERO(
3030
) RETURNS BOOL AS (
3131
good + needs_improvement + poor > 0
3232
);
33+
34+
CREATE TEMP FUNCTION get_passed_audits(lighthouse JSON)
35+
RETURNS ARRAY<STRUCT<
36+
category STRING,
37+
id STRING
38+
>>
39+
LANGUAGE js AS """
40+
const results = []
41+
42+
for (const category of Object.keys(lighthouse?.categories ? lighthouse.categories : {})) {
43+
for (const audit of lighthouse.categories[category].auditRefs) {
44+
if (
45+
lighthouse.audits[audit.id].score === 1 &&
46+
!['metrics', 'hidden'].includes(audit.group)
47+
) {
48+
results.push({
49+
category,
50+
id: audit.id
51+
})
52+
}
53+
}
54+
}
55+
56+
return results;
57+
""";
3358
`).query(ctx => `
3459
WITH pages AS (
3560
SELECT
@@ -172,7 +197,6 @@ technologies AS (
172197
WHERE
173198
tech.technology IS NOT NULL
174199
175-
176200
UNION ALL
177201
178202
SELECT
@@ -183,26 +207,7 @@ technologies AS (
183207
FROM pages
184208
),
185209
186-
categories AS (
187-
SELECT
188-
tech.technology,
189-
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
190-
FROM pages,
191-
UNNEST(technologies) AS tech,
192-
UNNEST(tech.categories) AS category
193-
GROUP BY technology
194-
195-
UNION ALL
196-
197-
SELECT
198-
'ALL' AS technology,
199-
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
200-
FROM pages,
201-
UNNEST(technologies) AS tech,
202-
UNNEST(tech.categories) AS category
203-
),
204-
205-
lab_metrics AS (
210+
lab_data AS (
206211
SELECT
207212
client,
208213
page,
@@ -213,36 +218,168 @@ lab_metrics AS (
213218
SAFE.FLOAT64(lighthouse.categories.accessibility.score) AS accessibility,
214219
SAFE.FLOAT64(lighthouse.categories['best-practices'].score) AS best_practices,
215220
SAFE.FLOAT64(lighthouse.categories.performance.score) AS performance,
216-
SAFE.FLOAT64(lighthouse.categories.pwa.score) AS pwa,
217221
SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo
218222
FROM pages
219223
),
220224
221-
lab_data AS (
225+
audits AS (
226+
SELECT DISTINCT
227+
client,
228+
root_page,
229+
technology,
230+
version,
231+
audit_category,
232+
audit_id
233+
FROM (
234+
SELECT
235+
client,
236+
page,
237+
root_page,
238+
audits.category AS audit_category,
239+
audits.id AS audit_id
240+
FROM pages
241+
INNER JOIN UNNEST(get_passed_audits(pages.lighthouse)) AS audits
242+
) AS audits_data
243+
INNER JOIN technologies
244+
USING (client, page)
245+
),
246+
247+
lab_metrics AS (
222248
SELECT
223249
client,
224250
root_page,
225251
technology,
226252
version,
227-
ANY_VALUE(category) AS category,
228253
AVG(bytesTotal) AS bytesTotal,
229254
AVG(bytesJS) AS bytesJS,
230255
AVG(bytesImg) AS bytesImg,
231256
AVG(accessibility) AS accessibility,
232257
AVG(best_practices) AS best_practices,
233258
AVG(performance) AS performance,
234-
AVG(pwa) AS pwa,
235259
AVG(seo) AS seo
236-
FROM lab_metrics
260+
FROM lab_data
237261
INNER JOIN technologies
238262
USING (client, page)
239-
INNER JOIN categories
240-
USING (technology)
241263
GROUP BY
242264
client,
243265
root_page,
244266
technology,
245267
version
268+
),
269+
270+
origins_summary AS (
271+
SELECT
272+
geo,
273+
client,
274+
rank,
275+
technology,
276+
version,
277+
COUNT(DISTINCT root_page) AS origins
278+
FROM lab_metrics
279+
INNER JOIN crux
280+
USING (client, root_page)
281+
GROUP BY
282+
geo,
283+
client,
284+
rank,
285+
technology,
286+
version
287+
288+
),
289+
290+
291+
audits_summary AS (
292+
SELECT
293+
geo,
294+
client,
295+
rank,
296+
technology,
297+
version,
298+
ARRAY_AGG(STRUCT(
299+
audit_category AS category,
300+
audit_id AS id,
301+
SAFE_DIVIDE(audits.origins, origins_summary.origins) AS pass_rate
302+
)) AS audits
303+
FROM (
304+
SELECT
305+
geo,
306+
client,
307+
rank,
308+
technology,
309+
version,
310+
audit_category,
311+
audit_id,
312+
COUNT(DISTINCT root_page) AS origins
313+
FROM audits
314+
INNER JOIN crux
315+
USING (client, root_page)
316+
GROUP BY
317+
geo,
318+
client,
319+
rank,
320+
technology,
321+
version,
322+
audit_category,
323+
audit_id
324+
) AS audits
325+
LEFT JOIN origins_summary
326+
USING (geo, client, rank, technology, version)
327+
GROUP BY
328+
geo,
329+
client,
330+
rank,
331+
technology,
332+
version
333+
),
334+
335+
other_summary AS (
336+
SELECT
337+
geo,
338+
client,
339+
rank,
340+
technology,
341+
version,
342+
343+
STRUCT(
344+
COUNTIF(good_fid) AS origins_with_good_fid,
345+
COUNTIF(good_cls) AS origins_with_good_cls,
346+
COUNTIF(good_lcp) AS origins_with_good_lcp,
347+
COUNTIF(good_fcp) AS origins_with_good_fcp,
348+
COUNTIF(good_ttfb) AS origins_with_good_ttfb,
349+
COUNTIF(good_inp) AS origins_with_good_inp,
350+
COUNTIF(any_fid) AS origins_with_any_fid,
351+
COUNTIF(any_cls) AS origins_with_any_cls,
352+
COUNTIF(any_lcp) AS origins_with_any_lcp,
353+
COUNTIF(any_fcp) AS origins_with_any_fcp,
354+
COUNTIF(any_ttfb) AS origins_with_any_ttfb,
355+
COUNTIF(any_inp) AS origins_with_any_inp,
356+
COUNTIF(good_cwv) AS origins_with_good_cwv,
357+
COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv,
358+
SAFE_DIVIDE(COUNTIF(good_cwv), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv
359+
) AS crux,
360+
361+
STRUCT(
362+
SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS accessibility,
363+
SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS practices,
364+
SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS performance,
365+
SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS seo
366+
) AS median_lighthouse_score,
367+
368+
STRUCT(
369+
SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS total,
370+
SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS js,
371+
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS images
372+
) AS median_page_weight_bytes
373+
374+
FROM lab_metrics
375+
INNER JOIN crux
376+
USING (client, root_page)
377+
GROUP BY
378+
geo,
379+
client,
380+
rank,
381+
technology,
382+
version
246383
)
247384
248385
SELECT
@@ -252,44 +389,16 @@ SELECT
252389
rank,
253390
technology,
254391
version,
255-
COUNT(DISTINCT root_page) AS origins,
256-
257-
# CrUX data
258-
COUNTIF(good_fid) AS origins_with_good_fid,
259-
COUNTIF(good_cls) AS origins_with_good_cls,
260-
COUNTIF(good_lcp) AS origins_with_good_lcp,
261-
COUNTIF(good_fcp) AS origins_with_good_fcp,
262-
COUNTIF(good_ttfb) AS origins_with_good_ttfb,
263-
COUNTIF(good_inp) AS origins_with_good_inp,
264-
COUNTIF(any_fid) AS origins_with_any_fid,
265-
COUNTIF(any_cls) AS origins_with_any_cls,
266-
COUNTIF(any_lcp) AS origins_with_any_lcp,
267-
COUNTIF(any_fcp) AS origins_with_any_fcp,
268-
COUNTIF(any_ttfb) AS origins_with_any_ttfb,
269-
COUNTIF(any_inp) AS origins_with_any_inp,
270-
COUNTIF(good_cwv) AS origins_with_good_cwv,
271-
COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv,
272-
SAFE_DIVIDE(COUNTIF(good_cwv), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv,
273-
274-
# Lighthouse data
275-
SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_accessibility,
276-
SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_best_practices,
277-
SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_performance,
278-
SAFE_CAST(APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_pwa,
279-
SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_seo,
280392
281-
# Page weight stats
282-
SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS median_bytes_total,
283-
SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS median_bytes_js,
284-
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image
285-
286-
FROM lab_data
287-
INNER JOIN crux
288-
USING (client, root_page)
289-
GROUP BY
290-
geo,
291-
client,
292-
rank,
293-
technology,
294-
version
393+
# Metrics
394+
origins,
395+
crux,
396+
median_lighthouse_score,
397+
median_page_weight_bytes,
398+
audits
399+
FROM origins_summary
400+
LEFT JOIN other_summary
401+
USING (geo, client, rank, technology, version)
402+
LEFT JOIN audits_summary
403+
USING (geo, client, rank, technology, version)
295404
`)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
const pastMonth = constants.fnPastMonth(constants.currentMonth)
2+
3+
publish('tech_report_audits', {
4+
schema: 'reports',
5+
type: 'incremental',
6+
protected: true,
7+
bigquery: {
8+
partitionBy: 'date',
9+
clusterBy: ['rank', 'geo']
10+
},
11+
tags: ['tech_report']
12+
}).preOps(ctx => `
13+
CREATE TEMP FUNCTION GET_AUDITS(
14+
records ARRAY<STRUCT<
15+
client STRING,
16+
audits ARRAY<STRUCT<
17+
category STRING,
18+
id STRING,
19+
pass_rate FLOAT64
20+
>>
21+
>>
22+
)
23+
RETURNS ARRAY<STRUCT<
24+
category STRING,
25+
id STRING,
26+
mobile STRUCT<
27+
pass_rate FLOAT64
28+
>,
29+
desktop STRUCT<
30+
pass_rate FLOAT64
31+
>
32+
>>
33+
LANGUAGE js AS '''
34+
// Create a map to accumulate audits based on a unique key (category + id).
35+
var auditMap = {};
36+
37+
// Loop over each record.
38+
records.forEach(function(record) {
39+
// Loop over each audit in the record.
40+
record.audits.forEach(function(audit) {
41+
// Create a unique key for combining audits.
42+
var key = audit.category + '|' + audit.id;
43+
// Initialize the audit in the map if not present.
44+
if (!auditMap[key]) {
45+
auditMap[key] = {
46+
category: audit.category,
47+
id: audit.id,
48+
mobile: { pass_rate: 0 },
49+
desktop: { pass_rate: 0 }
50+
};
51+
}
52+
// Add the pass_rate to the proper client type.
53+
if (record.client === 'mobile') {
54+
auditMap[key].mobile.pass_rate += audit.pass_rate;
55+
} else if (record.client === 'desktop') {
56+
auditMap[key].desktop.pass_rate += audit.pass_rate;
57+
}
58+
});
59+
});
60+
61+
// Convert the map into an array of audits.
62+
return Object.keys(auditMap).map(function(key) {
63+
return auditMap[key];
64+
});
65+
''';
66+
67+
DELETE FROM ${ctx.self()}
68+
WHERE date = '${pastMonth}';
69+
`).query(ctx => `
70+
/* {"dataform_trigger": "tech_report_complete", "date": "${pastMonth}", "name": "audits", "type": "report"} */
71+
SELECT
72+
date,
73+
geo,
74+
rank,
75+
technology,
76+
version,
77+
GET_AUDITS(ARRAY_AGG(STRUCT(
78+
client,
79+
audits
80+
))) AS audits
81+
FROM ${ctx.ref('reports', 'tech_crux')}
82+
WHERE date = '${pastMonth}'
83+
GROUP BY
84+
date,
85+
geo,
86+
rank,
87+
technology,
88+
version
89+
`)

0 commit comments

Comments
 (0)