memory : use sequential equal splits for recurrent modules (#16442)

author Georgi Gerganov <redacted>

Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)

committer GitHub <redacted>

Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)
author Georgi Gerganov <redacted>
Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)
committer GitHub <redacted>
Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)
diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp

index cb8832a353b11c4a4841e44cf823e0e952e60432..dfb8439e01bdfb53fd2ac2a270d6534a7fac7a60 100644 (file)
--- a/src/llama-memory-hybrid.cpp
+++ b/src/llama-memory-hybrid.cpp
@@ -73,7 +73,9 @@ llama_memory_context_ptr llama_memory_hybrid::init_batch(llama_batch_allocr & ba
                  // if all tokens are output, split by sequence
                  ubatch = balloc.split_seq(n_ubatch);
              } else {
-                ubatch = balloc.split_equal(n_ubatch, false);
+                // TODO: non-sequential equal split can be done if using unified KV cache
+                //       for simplicity, we always use sequential equal split for now
+                ubatch = balloc.split_equal(n_ubatch, true);
              }
  
              if (ubatch.n_tokens == 0) {
diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp

index e23e74982b2786b909b7b4afb5b1bd920913d3d5..9402d9cb8df9fbe46506d6e813a3949760b7ae31 100644 (file)
--- a/src/llama-memory-recurrent.cpp
+++ b/src/llama-memory-recurrent.cpp
@@ -382,7 +382,9 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
                  // if all tokens are output, split by sequence
                  ubatch = balloc.split_seq(n_ubatch);
              } else {
-                ubatch = balloc.split_equal(n_ubatch, false);
+                // TODO: non-sequential equal split can be done if using unified KV cache
+                //       for simplicity, we always use sequential equal split for now
+                ubatch = balloc.split_equal(n_ubatch, true);
              }
  
              if (ubatch.n_tokens == 0) {
author	Georgi Gerganov <redacted>
	Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)
committer	GitHub <redacted>
	Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)
src/llama-memory-hybrid.cpp		patch \| blob \| history
src/llama-memory-recurrent.cpp		patch \| blob \| history