Skip to content

Commit 9c42b17

Browse files
CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (#12098)
1 parent 05e6f5a commit 9c42b17

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

ggml/src/ggml-cuda/mmq.cuh

+2-2
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ static constexpr __device__ int get_mmq_x_max_device() {
109109

110110
#if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
111111
#ifdef GGML_CUDA_FORCE_MMQ
112-
return MMQ_DP4A_MAX_BATCH_SIZE;
113-
#else // GGML_CUDA_FORCE_MMQ
114112
return 128;
113+
#else // GGML_CUDA_FORCE_MMQ
114+
return MMQ_DP4A_MAX_BATCH_SIZE;
115115
#endif // GGML_CUDA_FORCE_MMQ
116116
#else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
117117

0 commit comments

Comments
 (0)