File tree Expand file tree Collapse file tree 3 files changed +9
-5
lines changed
app_tests/integration_tests/llm/shortfin
shortfin/python/shortfin_apps/llm/components Expand file tree Collapse file tree 3 files changed +9
-5
lines changed Original file line number Diff line number Diff line change @@ -83,7 +83,11 @@ def model_test_dir(request, tmp_path_factory):
83
83
"prefill_batch_sizes" : batch_sizes ,
84
84
"decode_batch_sizes" : batch_sizes ,
85
85
"transformer_block_count" : 26 ,
86
- "paged_kv_cache" : {"block_seq_stride" : 16 , "device_block_count" : 256 },
86
+ "paged_kv_cache" : {
87
+ "block_seq_stride" : 16 ,
88
+ "device_block_count" : 256 ,
89
+ "prefix_sharing_algorithm" : "none" ,
90
+ },
87
91
}
88
92
logger .info (f"Saving edited config to: { edited_config_path } \n " )
89
93
logger .info (f"Config: { json .dumps (config , indent = 2 )} " )
Original file line number Diff line number Diff line change @@ -86,7 +86,7 @@ class PagedKVCacheParams:
86
86
# Size of the cache on each device.
87
87
device_block_count : int
88
88
89
- cache_type : str = "base " # currently supporting base and trie
89
+ prefix_sharing_algorithm : str = "none " # currently supporting none and trie
90
90
91
91
92
92
@dataclass_json (undefined = Undefined .RAISE )
Original file line number Diff line number Diff line change @@ -68,19 +68,19 @@ def __init__(
68
68
page_pool = PagePool (
69
69
devices = self .main_fiber .devices_dict .values (), config = page_pool_config
70
70
)
71
- if model_params .paged_kv_cache .cache_type == "trie" :
71
+ if model_params .paged_kv_cache .prefix_sharing_algorithm == "trie" :
72
72
self .page_cache = TriePagedAttentionCache (
73
73
page_pool = page_pool ,
74
74
tokens_per_page = model_params .paged_kv_cache .block_seq_stride ,
75
75
)
76
- elif model_params .paged_kv_cache .cache_type == "base " :
76
+ elif model_params .paged_kv_cache .prefix_sharing_algorithm == "none " :
77
77
self .page_cache = BasePagedAttentionCache (
78
78
page_pool = page_pool ,
79
79
tokens_per_page = model_params .paged_kv_cache .block_seq_stride ,
80
80
)
81
81
else :
82
82
raise ValueError (
83
- f"Unknown model_params.paged_kv_cache.cache_type { model_params .paged_kv_cache .cache_type } . Currently only supporting 'trie' and 'base '."
83
+ f"Unknown model_params.paged_kv_cache.prefix_sharing_algorithm { model_params .paged_kv_cache .prefix_sharing_algorithm } . Currently only supporting 'trie' and 'none '."
84
84
)
85
85
86
86
self .program_isolation = PROG_ISOLATIONS [program_isolation ]
You can’t perform that action at this time.
0 commit comments