Skip to content

Commit 4562e5f

Browse files
ggerganovjordankanter
authored andcommitted
perplexity : fix kv cache handling for hellaswag (ggml-org#4981)
ggml-ci
1 parent e512b62 commit 4562e5f

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

examples/perplexity/perplexity.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ static std::vector<float> hellaswag_evaluate_tokens(
428428
for (size_t i_chunk = 0; i_chunk < n_chunk; ++i_chunk) {
429429
size_t n_tokens = tokens.size() - i_chunk * n_batch;
430430
n_tokens = std::min(n_tokens, size_t(n_batch));
431+
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
431432
if (llama_decode(ctx, llama_batch_get_one(tokens.data() + i_chunk * n_batch, n_tokens, n_past, 0))) {
432433
fprintf(stderr, "%s : failed to eval\n", __func__);
433434
return {};

0 commit comments

Comments
 (0)