Skip to content

Commit 67bcc86

Browse files
committed
add another test case for trie and xfail it
1 parent 926d483 commit 67bcc86

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

app_tests/integration_tests/llm/shortfin/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def model_test_dir(request, tmp_path_factory):
5151
tokenizer_id = request.param["tokenizer_id"]
5252
settings = request.param["settings"]
5353
batch_sizes = request.param["batch_sizes"]
54+
prefix_sharing_algorithm = request.param["prefix_sharing_algorithm"]
5455

5556
tmp_dir = tmp_path_factory.mktemp("cpu_llm_server_test")
5657
hf_home = os.environ.get("HF_HOME", None)

app_tests/integration_tests/llm/shortfin/cpu_llm_server_test.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,24 @@ def do_generate(prompt, port):
7272
"tokenizer_id": "openlm-research/open_llama_3b_v2",
7373
"settings": CPU_SETTINGS,
7474
"batch_sizes": [1, 4],
75+
"prefix_sharing_algorithm": "none",
7576
},
7677
{"model_file": "open-llama-3b-v2-f16.gguf", "settings": CPU_SETTINGS},
77-
)
78+
),
79+
pytest.param(
80+
{
81+
"repo_id": "SlyEcho/open_llama_3b_v2_gguf",
82+
"model_file": "open-llama-3b-v2-f16.gguf",
83+
"tokenizer_id": "openlm-research/open_llama_3b_v2",
84+
"settings": CPU_SETTINGS,
85+
"batch_sizes": [1, 4],
86+
"prefix_sharing_algorithm": "trie",
87+
},
88+
{"model_file": "open-llama-3b-v2-f16.gguf", "settings": CPU_SETTINGS},
89+
marks=pytest.mark.xfail(
90+
reason="Trie-based prefix sharing not yet supported"
91+
),
92+
),
7893
],
7994
indirect=True,
8095
)

0 commit comments

Comments
 (0)