From: Johannes Gäßler Date: Fri, 28 Feb 2025 08:26:43 +0000 (+0100) Subject: CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (llama/12098) X-Git-Tag: upstream/0.0.1802~29 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=5e0d4516f8bd548c509fb670fdb177fc1469e702;p=pkg%2Fggml%2Fsources%2Fggml CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (llama/12098) --- diff --git a/src/ggml-cuda/mmq.cuh b/src/ggml-cuda/mmq.cuh index 0451c65f..f2aca1f2 100644 --- a/src/ggml-cuda/mmq.cuh +++ b/src/ggml-cuda/mmq.cuh @@ -109,9 +109,9 @@ static constexpr __device__ int get_mmq_x_max_device() { #if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA #ifdef GGML_CUDA_FORCE_MMQ - return MMQ_DP4A_MAX_BATCH_SIZE; -#else // GGML_CUDA_FORCE_MMQ return 128; +#else // GGML_CUDA_FORCE_MMQ + return MMQ_DP4A_MAX_BATCH_SIZE; #endif // GGML_CUDA_FORCE_MMQ #else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA