]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
memory : use sequential equal splits for recurrent modules (#16442)
authorGeorgi Gerganov <redacted>
Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)
committerGitHub <redacted>
Tue, 7 Oct 2025 05:24:17 +0000 (08:24 +0300)
src/llama-memory-hybrid.cpp
src/llama-memory-recurrent.cpp

index cb8832a353b11c4a4841e44cf823e0e952e60432..dfb8439e01bdfb53fd2ac2a270d6534a7fac7a60 100644 (file)
@@ -73,7 +73,9 @@ llama_memory_context_ptr llama_memory_hybrid::init_batch(llama_batch_allocr & ba
                 // if all tokens are output, split by sequence
                 ubatch = balloc.split_seq(n_ubatch);
             } else {
-                ubatch = balloc.split_equal(n_ubatch, false);
+                // TODO: non-sequential equal split can be done if using unified KV cache
+                //       for simplicity, we always use sequential equal split for now
+                ubatch = balloc.split_equal(n_ubatch, true);
             }
 
             if (ubatch.n_tokens == 0) {
index e23e74982b2786b909b7b4afb5b1bd920913d3d5..9402d9cb8df9fbe46506d6e813a3949760b7ae31 100644 (file)
@@ -382,7 +382,9 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
                 // if all tokens are output, split by sequence
                 ubatch = balloc.split_seq(n_ubatch);
             } else {
-                ubatch = balloc.split_equal(n_ubatch, false);
+                // TODO: non-sequential equal split can be done if using unified KV cache
+                //       for simplicity, we always use sequential equal split for now
+                ubatch = balloc.split_equal(n_ubatch, true);
             }
 
             if (ubatch.n_tokens == 0) {