Skip to content

Commit 9c8d809

Browse files
committed
sycl : Fixed mmvq being called when reorder is disabled
1 parent 187451b commit 9c8d809

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -2957,12 +2957,15 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
29572957
use_mul_mat_q = use_mul_mat_q && (src1->ne[1] <= MMQ_MAX_BATCH_SIZE);
29582958
#endif // SYCL_USE_XMX
29592959

2960+
const bool reorder = static_cast<ggml_tensor_extra_gpu *>(dst->src[0]->extra) &&
2961+
static_cast<ggml_tensor_extra_gpu *>(dst->src[0]->extra)->optimized_feature.reorder;
2962+
29602963
// mmvq path is faster in the CUDA backend.
29612964
if (ctx.stream()->get_backend() == sycl::backend::ext_oneapi_cuda
29622965
// Dispatch becomes obscure with the reorder, MMVQ when the reorder optimization
29632966
// is enabled takes precedence over DMMV, the current if-else implementation
29642967
// requires disabling DMMV if both conditions are met
2965-
|| (ctx.opt_feature.reorder && ggml_sycl_supports_reorder_mmvq(src0->type))) {
2968+
|| (reorder && ggml_sycl_supports_reorder_mmvq(src0->type))) {
29662969
use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
29672970
}
29682971

0 commit comments

Comments
 (0)