CUDA: fix should_use_mmvf for ne11 == 1 (llama/17085)

author Johannes Gäßler <redacted>

Fri, 7 Nov 2025 19:53:14 +0000 (20:53 +0100)

committer Georgi Gerganov <redacted>

Sun, 9 Nov 2025 16:30:22 +0000 (18:30 +0200)
author Johannes Gäßler <redacted>
Fri, 7 Nov 2025 19:53:14 +0000 (20:53 +0100)
committer Georgi Gerganov <redacted>
Sun, 9 Nov 2025 16:30:22 +0000 (18:30 +0200)
diff --git a/src/ggml-cuda/mmf.cu b/src/ggml-cuda/mmf.cu

index 69a60aceb82b74484a42f78e314368b1d6646f8e..153dd5a97d5a7d68b4217758c0a4b6d7d9cb4995 100644 (file)
--- a/src/ggml-cuda/mmf.cu
+++ b/src/ggml-cuda/mmf.cu
@@ -129,7 +129,13 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const
      if (src0_ne[0] % (warp_size * (4/ts)) != 0) {
          return false;
      }
-    for (size_t i = 0; i < GGML_MAX_DIMS; ++i) {
+
+    if (src0_nb[0] != ts) {
+        return false;
+    }
+
+    // Pointers not aligned to the size of half2/nv_bfloat162/float2 would result in a crash:
+    for (size_t i = 1; i < GGML_MAX_DIMS; ++i) {
          if (src0_nb[i] % (2*ts) != 0) {
              return false;
          }
diff --git a/src/ggml-cuda/mmvf.cu b/src/ggml-cuda/mmvf.cu

index 526d90d7aee52618b84b75b7112edb8f9c1cb680..6238ce7ebd7ba0dc4a18ef3def6557efda67e993 100644 (file)
--- a/src/ggml-cuda/mmvf.cu
+++ b/src/ggml-cuda/mmvf.cu
@@ -720,12 +720,19 @@ bool ggml_cuda_should_use_mmvf(enum ggml_type type, int cc, const int64_t * src0
      if (src0_ne[0] % 2 != 0) {
          return false;
      }
+
      const size_t ts = ggml_type_size(type);
-    for (size_t i = 0; i < GGML_MAX_DIMS; ++i) {
+    if (src0_nb[0] != ts) {
+        return false;
+    }
+
+    // Pointers not aligned to the size of half2/nv_bfloat162/float2 would result in a crash:
+    for (size_t i = 1; i < GGML_MAX_DIMS; ++i) {
          if (src0_nb[i] % (2*ts) != 0) {
              return false;
          }
      }
+
      switch (type) {
          case GGML_TYPE_F32:
              if (GGML_CUDA_CC_IS_NVIDIA(cc)) {
author	Johannes Gäßler <redacted>
	Fri, 7 Nov 2025 19:53:14 +0000 (20:53 +0100)
committer	Georgi Gerganov <redacted>
	Sun, 9 Nov 2025 16:30:22 +0000 (18:30 +0200)
src/ggml-cuda/mmf.cu		patch \| blob \| history
src/ggml-cuda/mmvf.cu		patch \| blob \| history