From: Georgi Gerganov <redacted>
Date: Tue, 7 Oct 2025 05:24:17 +0000 (+0300)
Subject: memory : use sequential equal splits for recurrent modules (#16442)
X-Git-Tag: upstream/0.0.6764~60
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=0123ff38f53d34752f29239a29d0e40a6dc4110f;p=pkg%2Fggml%2Fsources%2Fllama.cpp

memory : use sequential equal splits for recurrent modules (#16442)
---

diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp
index cb8832a35..dfb8439e0 100644
--- a/src/llama-memory-hybrid.cpp
+++ b/src/llama-memory-hybrid.cpp
@@ -73,7 +73,9 @@ llama_memory_context_ptr llama_memory_hybrid::init_batch(llama_batch_allocr & ba
                 // if all tokens are output, split by sequence
                 ubatch = balloc.split_seq(n_ubatch);
             } else {
-                ubatch = balloc.split_equal(n_ubatch, false);
+                // TODO: non-sequential equal split can be done if using unified KV cache
+                //       for simplicity, we always use sequential equal split for now
+                ubatch = balloc.split_equal(n_ubatch, true);
             }
 
             if (ubatch.n_tokens == 0) {
diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp
index e23e74982..9402d9cb8 100644
--- a/src/llama-memory-recurrent.cpp
+++ b/src/llama-memory-recurrent.cpp
@@ -382,7 +382,9 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
                 // if all tokens are output, split by sequence
                 ubatch = balloc.split_seq(n_ubatch);
             } else {
-                ubatch = balloc.split_equal(n_ubatch, false);
+                // TODO: non-sequential equal split can be done if using unified KV cache
+                //       for simplicity, we always use sequential equal split for now
+                ubatch = balloc.split_equal(n_ubatch, true);
             }
 
             if (ubatch.n_tokens == 0) {