From: Georgi Gerganov <redacted>
Date: Sun, 25 Jan 2026 13:48:56 +0000 (+0200)
Subject: kv-cache : support V-less cache (llama/19067)
X-Git-Tag: v0.9.6~34
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=fce2ea41b8c2b9bfd59a6f228c1fb9ca4c56f71e;p=pkg%2Fggml%2Fsources%2Fggml

kv-cache : support V-less cache (llama/19067)

* kv-cache : support V-less cache

* cuda : better check for V_is_K_view

* cuda : improve V_is_K_view check

* graph : add comments

* hparams : refactor
---

diff --git a/src/ggml-cuda/fattn-common.cuh b/src/ggml-cuda/fattn-common.cuh
index 40c77257..13c5b0a4 100644
--- a/src/ggml-cuda/fattn-common.cuh
+++ b/src/ggml-cuda/fattn-common.cuh
@@ -782,7 +782,7 @@ void launch_fattn(
     const ggml_tensor * K = dst->src[1];
     const ggml_tensor * V = dst->src[2];
 
-    const bool V_is_K_view = V->op == GGML_OP_VIEW && V->src[0] == K && V->data == K->data;
+    const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
 
     const ggml_tensor * mask  = dst->src[3];
     const ggml_tensor * sinks = dst->src[4];
diff --git a/src/ggml-cuda/fattn.cu b/src/ggml-cuda/fattn.cu
index ba2b96bc..a5e66241 100644
--- a/src/ggml-cuda/fattn.cu
+++ b/src/ggml-cuda/fattn.cu
@@ -247,7 +247,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
         }
     }
 
-    const bool V_is_K_view = V->op == GGML_OP_VIEW && V->src[0] == K && V->data == K->data;
+    const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
 
     const int cc = ggml_cuda_info().devices[device].cc;