cuda : fix "V is K view" check for non-unified KV cache (llama/19145)

author Georgi Gerganov <redacted>

Wed, 28 Jan 2026 07:15:27 +0000 (09:15 +0200)

committer Georgi Gerganov <redacted>

Fri, 30 Jan 2026 13:56:40 +0000 (15:56 +0200)
author Georgi Gerganov <redacted>
Wed, 28 Jan 2026 07:15:27 +0000 (09:15 +0200)
committer Georgi Gerganov <redacted>
Fri, 30 Jan 2026 13:56:40 +0000 (15:56 +0200)
diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh

index 3d7daccfdf873b2ef3b4aac14bb742d0460f379a..b6a7460da831dc8e28e9ff5a0c175ef12501de5f 100644 (file)
--- a/ggml/src/ggml-cuda/fattn-common.cuh
+++ b/ggml/src/ggml-cuda/fattn-common.cuh
@@ -789,7 +789,7 @@ void launch_fattn(
      const ggml_tensor * K = dst->src[1];
      const ggml_tensor * V = dst->src[2];
  
-    const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
+    const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));
  
      const ggml_tensor * mask  = dst->src[3];
      const ggml_tensor * sinks = dst->src[4];
diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu

index fe18ff6c7dcf2d61340449df29cf35a1445417db..195904ee2061eb12566f7f2aa64ea7c5b03fc18d 100644 (file)
--- a/ggml/src/ggml-cuda/fattn.cu
+++ b/ggml/src/ggml-cuda/fattn.cu
@@ -310,7 +310,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
          }
      }
  
-    const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
+    const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));
  
      const int cc = ggml_cuda_info().devices[device].cc;
author	Georgi Gerganov <redacted>
	Wed, 28 Jan 2026 07:15:27 +0000 (09:15 +0200)
committer	Georgi Gerganov <redacted>
	Fri, 30 Jan 2026 13:56:40 +0000 (15:56 +0200)
ggml/src/ggml-cuda/fattn-common.cuh		patch \| blob \| history
ggml/src/ggml-cuda/fattn.cu		patch \| blob \| history