Fix publish dir of llama tests, update xpass 8b test and 405b fp8 test failures (#580)

aviator19941 · saienduri · web-flow · commit fd15aa5a30f8 · 2024-11-22T01:26:09.000Z
Fixes publish dir of llama tests to `out/llm/llama/benchmarks`, update
xpass 8b test (`testBenchmark8B_f16_Non_Decomposed_Prefill`) and 405b
fp8 test failures (`testBenchmark405B_fp8_TP8_Decomposed` and
`testBenchmark405B_fp8_TP8_Non_Decomposed`).

---------

Signed-off-by: aviator19941 &lt;avinash.sharma@amd.com&gt;
Co-authored-by: saienduri &lt;77521230+saienduri@users.noreply.github.com&gt;
diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml
@@ -8,6 +8,7 @@ name: Llama Benchmarking Tests
 
 on:
   workflow_dispatch:
+  pull_request:
   schedule:
     # Weekdays at 4:00 AM UTC = 9:00 PM PST.
     - cron: "0 4 * * 1-5"
@@ -76,14 +77,14 @@ jobs:
             iree-base-runtime
 
       - name: Run llama tests
-        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --html=out/index.html
+        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --html=out/llm/llama/benchmark/index.html
 
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
         with:
           github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
-          publish_dir: ./out/llm/llama/benchmarks
-          destination_dir: ./llm/llama/benchmarks
+          publish_dir: ./out/llm/llama/benchmark
+          destination_dir: ./llm/llama/benchmark
           keep_files: true
 
       - name: Upload llama executable files
diff --git a/sharktank/tests/models/llama/benchmark_amdgpu_test.py b/sharktank/tests/models/llama/benchmark_amdgpu_test.py
@@ -197,7 +197,6 @@ def testBenchmark8B_f16_Decomposed(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
     def testBenchmark8B_f16_Non_Decomposed_Prefill(self):
         output_file_name = self.dir_path_8b / "f16_torch_prefill"
         output_mlir = self.llama8b_f16_torch_sdpa_artifacts.create_file(
@@ -780,7 +779,9 @@ def testBenchmark405B_f16_TP8_Decomposed(self):
             cwd=self.repo_root,
         )
 
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(
+        reason="Benchmarking Error", strict=True, raises=IreeBenchmarkException
+    )
     def testBenchmark405B_f16_TP8_Non_Decomposed(self):
         output_file_name = self.dir_path_405b / "f16_torch"
         output_mlir = self.llama405b_f16_torch_sdpa_artifacts.create_file(
@@ -828,7 +829,9 @@ def testBenchmark405B_f16_TP8_Non_Decomposed(self):
             cwd=self.repo_root,
         )
 
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(
+        reason="KeyError in theta.py", strict=True, raises=ExportMlirException
+    )
     def testBenchmark405B_fp8_TP8_Decomposed(self):
         output_file_name = self.dir_path_405b / "fp8_decomposed"
         output_mlir = self.llama405b_fp8_decomposed_artifacts.create_file(
@@ -874,7 +877,9 @@ def testBenchmark405B_fp8_TP8_Decomposed(self):
             cwd=self.repo_root,
         )
 
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(
+        reason="KeyError in theta.py", strict=True, raises=ExportMlirException
+    )
     def testBenchmark405B_fp8_TP8_Non_Decomposed(self):
         output_file_name = self.dir_path_405b / "fp8_torch"
         output_mlir = self.llama405b_fp8_torch_sdpa_artifacts.create_file(