kv-cache : ggml_rope_ext_inplace -> ggml_rope_ext

ggerganov · ggerganov · commit 2bc75455e25e · 2025-05-02T13:26:49.000+03:00
ggml-ci
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -613,14 +613,14 @@ ggml_tensor * llama_kv_cache_unified::build_rope_shift(
         // dequantize to f32 -> RoPE -> quantize back
         tmp = ggml_cast(ctx, cur, GGML_TYPE_F32);
 
-        tmp = ggml_rope_ext_inplace(ctx, tmp,
+        tmp = ggml_rope_ext(ctx, tmp,
                 shift, factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
                 yarn_ext_factor, yarn_attn_factor, yarn_beta_fast, yarn_beta_slow);
 
         tmp = ggml_cpy(ctx, tmp, cur);
     } else {
         // we rotate only the first n_rot dimensions
-        tmp = ggml_rope_ext_inplace(ctx, cur,
+        tmp = ggml_rope_ext(ctx, cur,
                 shift, factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
                 yarn_ext_factor, yarn_attn_factor, yarn_beta_fast, yarn_beta_slow);
     }