From: Georgi Gerganov Date: Sun, 25 Jan 2026 13:48:56 +0000 (+0200) Subject: kv-cache : support V-less cache (llama/19067) X-Git-Tag: v0.9.6~34 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=fce2ea41b8c2b9bfd59a6f228c1fb9ca4c56f71e;p=pkg%2Fggml%2Fsources%2Fggml kv-cache : support V-less cache (llama/19067) * kv-cache : support V-less cache * cuda : better check for V_is_K_view * cuda : improve V_is_K_view check * graph : add comments * hparams : refactor --- diff --git a/src/ggml-cuda/fattn-common.cuh b/src/ggml-cuda/fattn-common.cuh index 40c77257..13c5b0a4 100644 --- a/src/ggml-cuda/fattn-common.cuh +++ b/src/ggml-cuda/fattn-common.cuh @@ -782,7 +782,7 @@ void launch_fattn( const ggml_tensor * K = dst->src[1]; const ggml_tensor * V = dst->src[2]; - const bool V_is_K_view = V->op == GGML_OP_VIEW && V->src[0] == K && V->data == K->data; + const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src); const ggml_tensor * mask = dst->src[3]; const ggml_tensor * sinks = dst->src[4]; diff --git a/src/ggml-cuda/fattn.cu b/src/ggml-cuda/fattn.cu index ba2b96bc..a5e66241 100644 --- a/src/ggml-cuda/fattn.cu +++ b/src/ggml-cuda/fattn.cu @@ -247,7 +247,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const } } - const bool V_is_K_view = V->op == GGML_OP_VIEW && V->src[0] == K && V->data == K->data; + const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src); const int cc = ggml_cuda_info().devices[device].cc;