]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
kv-cache : fix k-shift for multiple streams (#14742)
authorGeorgi Gerganov <redacted>
Thu, 17 Jul 2025 17:52:33 +0000 (20:52 +0300)
committerGitHub <redacted>
Thu, 17 Jul 2025 17:52:33 +0000 (20:52 +0300)
ggml-ci

src/llama-kv-cache-unified.cpp

index 98c01ea7ad15d42f0a501d7c5683a72a8a6a4255..321dc79fc36ab708a4ac96076b3fabf200568a3e 100644 (file)
@@ -1261,7 +1261,7 @@ void llama_kv_cache_unified::set_input_k_shift(ggml_tensor * dst) const {
         const auto & cells = v_cells[s];
 
         for (uint32_t i = 0; i < cells.size(); ++i) {
-            data[i] = cells.is_empty(i) ? 0 : cells.get_shift(i);
+            data[s*cells.size() + i] = cells.is_empty(i) ? 0 : cells.get_shift(i);
         }
     }
 }