rename cache_type to prefix_sharing_algorithm

renxida · renxida · commit 926d48381915 · 2024-12-02T13:59:32.000-08:00
diff --git a/app_tests/integration_tests/llm/shortfin/conftest.py b/app_tests/integration_tests/llm/shortfin/conftest.py
@@ -83,7 +83,11 @@ def model_test_dir(request, tmp_path_factory):
             "prefill_batch_sizes": batch_sizes,
             "decode_batch_sizes": batch_sizes,
             "transformer_block_count": 26,
-            "paged_kv_cache": {"block_seq_stride": 16, "device_block_count": 256},
+            "paged_kv_cache": {
+                "block_seq_stride": 16,
+                "device_block_count": 256,
+                "prefix_sharing_algorithm": "none",
+            },
         }
         logger.info(f"Saving edited config to: {edited_config_path}\n")
         logger.info(f"Config: {json.dumps(config, indent=2)}")
diff --git a/shortfin/python/shortfin_apps/llm/components/config_struct.py b/shortfin/python/shortfin_apps/llm/components/config_struct.py
@@ -86,7 +86,7 @@ class PagedKVCacheParams:
     # Size of the cache on each device.
     device_block_count: int
 
-    cache_type: str = "base"  # currently supporting base and trie
+    prefix_sharing_algorithm: str = "none"  # currently supporting none and trie
 
 
 @dataclass_json(undefined=Undefined.RAISE)
diff --git a/shortfin/python/shortfin_apps/llm/components/service.py b/shortfin/python/shortfin_apps/llm/components/service.py
@@ -68,19 +68,19 @@ def __init__(
         page_pool = PagePool(
             devices=self.main_fiber.devices_dict.values(), config=page_pool_config
         )
-        if model_params.paged_kv_cache.cache_type == "trie":
+        if model_params.paged_kv_cache.prefix_sharing_algorithm == "trie":
             self.page_cache = TriePagedAttentionCache(
                 page_pool=page_pool,
                 tokens_per_page=model_params.paged_kv_cache.block_seq_stride,
             )
-        elif model_params.paged_kv_cache.cache_type == "base":
+        elif model_params.paged_kv_cache.prefix_sharing_algorithm == "none":
             self.page_cache = BasePagedAttentionCache(
                 page_pool=page_pool,
                 tokens_per_page=model_params.paged_kv_cache.block_seq_stride,
             )
         else:
             raise ValueError(
-                f"Unknown model_params.paged_kv_cache.cache_type {model_params.paged_kv_cache.cache_type}. Currently only supporting 'trie' and 'base'."
+                f"Unknown model_params.paged_kv_cache.prefix_sharing_algorithm {model_params.paged_kv_cache.prefix_sharing_algorithm}. Currently only supporting 'trie' and 'none'."
             )
 
         self.program_isolation = PROG_ISOLATIONS[program_isolation]

Original file line number	Diff line number	Diff line change
`@@ -68,19 +68,19 @@ def __init__(`
`68`	`68`	`page_pool = PagePool(`
`69`	`69`	`devices=self.main_fiber.devices_dict.values(), config=page_pool_config`
`70`	`70`	`)`
`71`		`- if model_params.paged_kv_cache.cache_type == "trie":`
	`71`	`+ if model_params.paged_kv_cache.prefix_sharing_algorithm == "trie":`
`72`	`72`	`self.page_cache = TriePagedAttentionCache(`
`73`	`73`	`page_pool=page_pool,`
`74`	`74`	`tokens_per_page=model_params.paged_kv_cache.block_seq_stride,`
`75`	`75`	`)`
`76`		`- elif model_params.paged_kv_cache.cache_type == "base":`
	`76`	`+ elif model_params.paged_kv_cache.prefix_sharing_algorithm == "none":`
`77`	`77`	`self.page_cache = BasePagedAttentionCache(`
`78`	`78`	`page_pool=page_pool,`
`79`	`79`	`tokens_per_page=model_params.paged_kv_cache.block_seq_stride,`
`80`	`80`	`)`
`81`	`81`	`else:`
`82`	`82`	`raise ValueError(`
`83`		`- f"Unknown model_params.paged_kv_cache.cache_type {model_params.paged_kv_cache.cache_type}. Currently only supporting 'trie' and 'base'."`
	`83`	`+ f"Unknown model_params.paged_kv_cache.prefix_sharing_algorithm {model_params.paged_kv_cache.prefix_sharing_algorithm}. Currently only supporting 'trie' and 'none'."`
`84`	`84`	`)`
`85`	`85`
`86`	`86`	`self.program_isolation = PROG_ISOLATIONS[program_isolation]`