From: Johannes Gäßler Date: Fri, 7 Nov 2025 19:53:14 +0000 (+0100) Subject: CUDA: fix should_use_mmvf for ne11 == 1 (#17085) X-Git-Tag: upstream/0.0.7011~33 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=65156105069fa86a4a81b6cb0e8cb583f6420677;p=pkg%2Fggml%2Fsources%2Fllama.cpp CUDA: fix should_use_mmvf for ne11 == 1 (#17085) * CUDA: fix should_use_mmvf for ne11 == 1 * Apply suggestion from @am17an Co-authored-by: Aman Gupta --------- Co-authored-by: Aman Gupta --- diff --git a/ggml/src/ggml-cuda/mmf.cu b/ggml/src/ggml-cuda/mmf.cu index 69a60ace..153dd5a9 100644 --- a/ggml/src/ggml-cuda/mmf.cu +++ b/ggml/src/ggml-cuda/mmf.cu @@ -129,7 +129,13 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const if (src0_ne[0] % (warp_size * (4/ts)) != 0) { return false; } - for (size_t i = 0; i < GGML_MAX_DIMS; ++i) { + + if (src0_nb[0] != ts) { + return false; + } + + // Pointers not aligned to the size of half2/nv_bfloat162/float2 would result in a crash: + for (size_t i = 1; i < GGML_MAX_DIMS; ++i) { if (src0_nb[i] % (2*ts) != 0) { return false; } diff --git a/ggml/src/ggml-cuda/mmvf.cu b/ggml/src/ggml-cuda/mmvf.cu index 526d90d7..6238ce7e 100644 --- a/ggml/src/ggml-cuda/mmvf.cu +++ b/ggml/src/ggml-cuda/mmvf.cu @@ -720,12 +720,19 @@ bool ggml_cuda_should_use_mmvf(enum ggml_type type, int cc, const int64_t * src0 if (src0_ne[0] % 2 != 0) { return false; } + const size_t ts = ggml_type_size(type); - for (size_t i = 0; i < GGML_MAX_DIMS; ++i) { + if (src0_nb[0] != ts) { + return false; + } + + // Pointers not aligned to the size of half2/nv_bfloat162/float2 would result in a crash: + for (size_t i = 1; i < GGML_MAX_DIMS; ++i) { if (src0_nb[i] % (2*ts) != 0) { return false; } } + switch (type) { case GGML_TYPE_F32: if (GGML_CUDA_CC_IS_NVIDIA(cc)) {