From: Johannes Gäßler Date: Fri, 28 Feb 2025 08:26:43 +0000 (+0100) Subject: CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (#12098) X-Git-Tag: upstream/0.0.4853~64 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=9c42b1718ca8299f9afeabdc122badeab64c9690;p=pkg%2Fggml%2Fsources%2Fllama.cpp CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (#12098) --- diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index 0451c65f..f2aca1f2 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -109,9 +109,9 @@ static constexpr __device__ int get_mmq_x_max_device() { #if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA #ifdef GGML_CUDA_FORCE_MMQ - return MMQ_DP4A_MAX_BATCH_SIZE; -#else // GGML_CUDA_FORCE_MMQ return 128; +#else // GGML_CUDA_FORCE_MMQ + return MMQ_DP4A_MAX_BATCH_SIZE; #endif // GGML_CUDA_FORCE_MMQ #else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA