memory : fix broken batch splits for recurrent cache (#14575)

author compilade <redacted>

Tue, 8 Jul 2025 15:37:47 +0000 (11:37 -0400)

committer GitHub <redacted>

Tue, 8 Jul 2025 15:37:47 +0000 (18:37 +0300)
author compilade <redacted>
Tue, 8 Jul 2025 15:37:47 +0000 (11:37 -0400)
committer GitHub <redacted>
Tue, 8 Jul 2025 15:37:47 +0000 (18:37 +0300)
diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp

index 4b90dac7a327cf4a0c29396b3a3215a1e2ad84d0..a1b5b1a272cc09d02db4ecd7f5aa0686640f38ed 100644 (file)
--- a/src/llama-memory-recurrent.cpp
+++ b/src/llama-memory-recurrent.cpp
@@ -377,14 +377,18 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
                  ubatch = balloc.split_equal(n_ubatch, false);
              }
  
-            if (balloc.get_n_used() < balloc.get_n_tokens()) {
-                // failed to find a suitable split
+            if (ubatch.n_tokens == 0) {
                  break;
              }
  
              ubatches.push_back(std::move(ubatch)); // NOLINT
          }
  
+        if (balloc.get_n_used() < balloc.get_n_tokens()) {
+            // failed to find a suitable split
+            break;
+        }
+
          if (!prepare(ubatches)) {
              break;
          }