Fix pt2 dashboard passrate calculation regression (#6170)

huydhn · web-flow · commit 21cb495479c0 · 2025-01-15T09:16:47.000-08:00
The regression comes from #6006 where I changed `accuracy_results LEFT JOIN performance_results` to `performance_results LEFT JOIN accuracy_results` to accommodate Apple MPS eager benchmark. The swapped join wrongly returned `model_fail_to_load` and `eager_fail_to_run` models that shouldn't be included in the pass rate calculation because they are not something wrong with pt2. ### Testing https://torchci-git-fork-huydhn-fix-pt2-dashboard-p-e92d66-fbopensource.vercel.app/benchmark/compilers?dashboard=torchinductor&startTime=Fri%2C%2019%20Jul%202024%2000%3A29%3A48%20GMT&stopTime=Wed%2C%2015%20Jan%202025%2001%3A29%3A48%20GMT&granularity=week&mode=inference&dtype=bfloat16&deviceName=cuda%20(a100)&lBranch=main&lCommit=1dab79470dbecef79ba4c7d4308d8a181091e58e&rBranch=main&rCommit=b732b52f1e4378f8486ceb5e7026be3321c2651c * Before https://hud.pytorch.org/benchmark/torchbench/inductor_with_cudagraphs?dashboard=torchinductor&startTime=Thu%2C%2018%20Jul%202024%2023%3A36%3A11%20GMT&stopTime=Wed%2C%2015%20Jan%202025%2000%3A36%3A11%20GMT&granularity=week&mode=inference&dtype=bfloat16&deviceName=cuda%20(a100)&lBranch=main&lCommit=b732b52f1e4378f8486ceb5e7026be3321c2651c&rBranch=main&rCommit=b732b52f1e4378f8486ceb5e7026be3321c2651c * After https://torchci-git-fork-huydhn-fix-pt2-dashboard-p-e92d66-fbopensource.vercel.app/benchmark/torchbench/inductor_with_cudagraphs?dashboard=torchinductor&startTime=Thu%2C%2018%20Jul%202024%2023%3A36%3A11%20GMT&stopTime=Wed%2C%2015%20Jan%202025%2000%3A36%3A11%20GMT&granularity=week&mode=inference&dtype=bfloat16&deviceName=cuda%20(a100)&lBranch=main&lCommit=b732b52f1e4378f8486ceb5e7026be3321c2651c&rBranch=main&rCommit=b732b52f1e4378f8486ceb5e7026be3321c2651chttps%3A%2F%2Ftorchci-git-fork-huydhn-fix-pt2-dashboard-p-e92d66-fbopensource.vercel.app%2F
diff --git a/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql b/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql
@@ -83,10 +83,8 @@ accuracy_results AS (
             workflow_id = { workflowId: Int64 }
             OR { workflowId: Int64 } = 0
         )
-        AND accuracy != 'model_fail_to_load'
-        AND accuracy != 'eager_fail_to_run'
 ),
-results AS (
+performance_join_accuracy_results AS (
     SELECT
         performance_results.workflow_id AS workflow_id,
         performance_results.job_id AS job_id,
@@ -144,6 +142,62 @@ results AS (
         LEFT JOIN accuracy_results ON performance_results.name = accuracy_results.name
         AND performance_results.replaced_filename = accuracy_results.replaced_filename
         AND performance_results.workflow_id = accuracy_results.workflow_id
+    WHERE
+        accuracy != 'model_fail_to_load'
+        AND accuracy != 'eager_fail_to_run'
+),
+-- This is to accommodate cases where only accuracy results are available, i.e. export
+accuracy_join_performance_results AS (
+    SELECT
+        accuracy_results.workflow_id AS workflow_id,
+        accuracy_results.job_id AS job_id,
+        CASE
+            WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN 'torchbench'
+            WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN 'timm_models'
+            WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN 'huggingface'
+            ELSE ''
+        END AS suite,
+        CASE
+            WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN REPLACE(
+                accuracy_results.replaced_filename,
+                '_torchbench',
+                ''
+            )
+            WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN REPLACE(
+                accuracy_results.replaced_filename,
+                '_timm_models',
+                ''
+            )
+            WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN REPLACE(
+                accuracy_results.replaced_filename,
+                '_huggingface',
+                ''
+            )
+            ELSE ''
+        END AS compiler,
+        accuracy_results.name,
+        0.0 AS speedup,
+        accuracy,
+        0.0 AS compilation_latency,
+        0.0 AS compression_ratio,
+        0.0 AS abs_latency,
+        0.0 AS dynamo_peak_mem,
+        0.0 AS eager_peak_mem,
+        accuracy_results.timestamp AS timestamp
+    FROM
+        accuracy_results
+        LEFT JOIN performance_results ON performance_results.name = accuracy_results.name
+        AND performance_results.replaced_filename = accuracy_results.replaced_filename
+        AND performance_results.workflow_id = accuracy_results.workflow_id
+    WHERE
+        performance_results.name = ''
+        AND accuracy != 'model_fail_to_load'
+        AND accuracy != 'eager_fail_to_run'
+),
+results AS (
+    SELECT * FROM performance_join_accuracy_results
+    UNION ALL
+    SELECT * FROM accuracy_join_performance_results
 )
 SELECT
     DISTINCT results.workflow_id,