]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
kv-cache : fix unified::seq_rm to work with seq_id < 0 (#13985)
authorGeorgi Gerganov <redacted>
Wed, 4 Jun 2025 06:50:32 +0000 (09:50 +0300)
committerGitHub <redacted>
Wed, 4 Jun 2025 06:50:32 +0000 (09:50 +0300)
ggml-ci

src/llama-kv-cache-unified.cpp

index a817154769a32306740c826ece3d6d4f60aee5d3..4007f202e313b9cb9186533532ae3c1f92481081 100644 (file)
@@ -149,12 +149,27 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
         p1 = std::numeric_limits<llama_pos>::max();
     }
 
-    for (uint32_t i = 0; i < cells.size(); ++i) {
-        if (!cells.pos_in(i, p0, p1)) {
-            continue;
+    if (seq_id >= 0) {
+        for (uint32_t i = 0; i < cells.size(); ++i) {
+            if (!cells.pos_in(i, p0, p1)) {
+                continue;
+            }
+
+            if (cells.seq_has(i, seq_id) && cells.seq_rm(i, seq_id)) {
+                if (new_head == cells.size()) {
+                    new_head = i;
+                }
+            }
         }
+    } else {
+        // match any sequence
+        for (uint32_t i = 0; i < cells.size(); ++i) {
+            if (!cells.pos_in(i, p0, p1)) {
+                continue;
+            }
+
+            cells.rm(i);
 
-        if (cells.seq_has(i, seq_id) && cells.seq_rm(i, seq_id)) {
             if (new_head == cells.size()) {
                 new_head = i;
             }