add another test case for trie and xfail it

renxida · renxida · commit 67bcc86d9459 · 2024-12-02T14:02:24.000-08:00
diff --git a/app_tests/integration_tests/llm/shortfin/conftest.py b/app_tests/integration_tests/llm/shortfin/conftest.py
@@ -51,6 +51,7 @@ def model_test_dir(request, tmp_path_factory):
     tokenizer_id = request.param["tokenizer_id"]
     settings = request.param["settings"]
     batch_sizes = request.param["batch_sizes"]
+    prefix_sharing_algorithm = request.param["prefix_sharing_algorithm"]
 
     tmp_dir = tmp_path_factory.mktemp("cpu_llm_server_test")
     hf_home = os.environ.get("HF_HOME", None)
diff --git a/app_tests/integration_tests/llm/shortfin/cpu_llm_server_test.py b/app_tests/integration_tests/llm/shortfin/cpu_llm_server_test.py
@@ -72,9 +72,24 @@ def do_generate(prompt, port):
                 "tokenizer_id": "openlm-research/open_llama_3b_v2",
                 "settings": CPU_SETTINGS,
                 "batch_sizes": [1, 4],
+                "prefix_sharing_algorithm": "none",
             },
             {"model_file": "open-llama-3b-v2-f16.gguf", "settings": CPU_SETTINGS},
-        )
+        ),
+        pytest.param(
+            {
+                "repo_id": "SlyEcho/open_llama_3b_v2_gguf",
+                "model_file": "open-llama-3b-v2-f16.gguf",
+                "tokenizer_id": "openlm-research/open_llama_3b_v2",
+                "settings": CPU_SETTINGS,
+                "batch_sizes": [1, 4],
+                "prefix_sharing_algorithm": "trie",
+            },
+            {"model_file": "open-llama-3b-v2-f16.gguf", "settings": CPU_SETTINGS},
+            marks=pytest.mark.xfail(
+                reason="Trie-based prefix sharing not yet supported"
+            ),
+        ),
     ],
     indirect=True,
 )