From: Georgi Gerganov Date: Wed, 28 Jan 2026 07:15:27 +0000 (+0200) Subject: cuda : fix "V is K view" check for non-unified KV cache (llama/19145) X-Git-Tag: upstream/1.8.3+155~101 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=7fb0f823de760f1be8e6d951516d3adc95b8bb61;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp cuda : fix "V is K view" check for non-unified KV cache (llama/19145) --- diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index 3d7daccf..b6a7460d 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -789,7 +789,7 @@ void launch_fattn( const ggml_tensor * K = dst->src[1]; const ggml_tensor * V = dst->src[2]; - const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src); + const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs)); const ggml_tensor * mask = dst->src[3]; const ggml_tensor * sinks = dst->src[4]; diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index fe18ff6c..195904ee 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -310,7 +310,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const } } - const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src); + const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs)); const int cc = ggml_cuda_info().devices[device].cc;