batch : fix check for empty sequences in memory (#14364)

author Georgi Gerganov <redacted>

Tue, 24 Jun 2025 15:26:30 +0000 (18:26 +0300)

committer GitHub <redacted>

Tue, 24 Jun 2025 15:26:30 +0000 (18:26 +0300)
author Georgi Gerganov <redacted>
Tue, 24 Jun 2025 15:26:30 +0000 (18:26 +0300)
committer GitHub <redacted>
Tue, 24 Jun 2025 15:26:30 +0000 (18:26 +0300)
diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp

index 401e11364dbc97dfcb11e1c9d88de45df55c1686..91b1d6078a2529e4c31c43a5295f26c35b70e090 100644 (file)
--- a/src/llama-batch.cpp
+++ b/src/llama-batch.cpp
@@ -244,11 +244,13 @@ bool llama_batch_allocr::init(
              continue;
          }
  
-        if (memory) {
+        const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
+
+        if (p0 >= 0) {
              bool ok = true;
  
              if (batch.token) {
-                if (seq_pos_min(s) != memory->seq_pos_max(s) + 1) {
+                if (seq_pos_min(s) != p0 + 1) {
                      ok = false;
                  }
              } else {
@@ -256,7 +258,7 @@ bool llama_batch_allocr::init(
  
                  // for embeddings (typically used as vision input), we allow them to have repeating positions
                  // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
-                if (seq_pos_min(s) != memory->seq_pos_max(s) && seq_pos_min(s) != memory->seq_pos_max(s) + 1) {
+                if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) {
                      ok = false;
                  }
              }
@@ -267,7 +269,7 @@ bool llama_batch_allocr::init(
                          " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
                          " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
                          " it is required that the sequence positions remain consecutive: Y = X + 1\n",
-                        __func__, s, s, memory->seq_pos_max(s), s, seq_pos_min(s));
+                        __func__, s, s, p0, s, seq_pos_min(s));
  
                  return false;
              }
author	Georgi Gerganov <redacted>
	Tue, 24 Jun 2025 15:26:30 +0000 (18:26 +0300)
committer	GitHub <redacted>
	Tue, 24 Jun 2025 15:26:30 +0000 (18:26 +0300)