From 1c08e34fbacf89f64d524ebca197385b54c68c81 Mon Sep 17 00:00:00 2001 From: Cedar Date: Mon, 25 Nov 2024 16:07:57 -0800 Subject: [PATCH] missed problem with math --- .../llm/components/kvcache/base_attention_cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py b/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py index 7b9f38145..0007000bc 100644 --- a/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py +++ b/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py @@ -53,7 +53,8 @@ def acquire_pages_for_tokens( No token at idx < n_cached_token should be written to. TODO: consider enforcing this. """ - pages_needed = math.ceil(len(tokens) + extra_token_slots / self.tokens_per_page) + token_count = len(tokens) + pages_needed = math.ceil(token_count / self.tokens_per_page) pages = self.page_pool.acquire_free_pages(pages_needed) n_cached_tokens = 0