From 1c08e34fbacf89f64d524ebca197385b54c68c81 Mon Sep 17 00:00:00 2001
From: Cedar <cedar.ren@gmail.com>
Date: Mon, 25 Nov 2024 16:07:57 -0800
Subject: [PATCH] missed problem with math

---
 .../llm/components/kvcache/base_attention_cache.py             | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py b/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py
index 7b9f38145a8..0007000bcae 100644
--- a/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py
+++ b/shortfin/python/shortfin_apps/llm/components/kvcache/base_attention_cache.py
@@ -53,7 +53,8 @@ def acquire_pages_for_tokens(
 
         No token at idx < n_cached_token should be written to. TODO: consider enforcing this.
         """
-        pages_needed = math.ceil(len(tokens) + extra_token_slots / self.tokens_per_page)
+        token_count = len(tokens)
+        pages_needed = math.ceil(token_count / self.tokens_per_page)
         pages = self.page_pool.acquire_free_pages(pages_needed)
 
         n_cached_tokens = 0