diff --git a/shortfin/python/shortfin_apps/llm/components/service.py b/shortfin/python/shortfin_apps/llm/components/service.py index 9ef8b5c4d..c8d6c51a8 100644 --- a/shortfin/python/shortfin_apps/llm/components/service.py +++ b/shortfin/python/shortfin_apps/llm/components/service.py @@ -383,7 +383,7 @@ async def run(self): if self.phase == InferencePhase.PREFILL: seq_lens_host = seq_lens.for_transfer() with seq_lens_host.map(discard=True) as m: - m.fill(0) + m.fill(1) m.items = [len(req.input_token_ids) for req in self.exec_requests] seq_lens_host.copy_to(seq_lens)