diff --git a/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py b/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py index 7b9f38145..0007000bc 100644 --- a/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py +++ b/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py @@ -53,7 +53,8 @@ def acquire_pages_for_tokens( No token at idx < n_cached_token should be written to. TODO: consider enforcing this. """ - pages_needed = math.ceil(len(tokens) + extra_token_slots / self.tokens_per_page) + token_count = len(tokens) + pages_needed = math.ceil(token_count / self.tokens_per_page) pages = self.page_pool.acquire_free_pages(pages_needed) n_cached_tokens = 0