context : fix pos_min initialization upon error decode (#14008)

author Georgi Gerganov <redacted>

Thu, 5 Jun 2025 06:06:29 +0000 (09:06 +0300)

committer GitHub <redacted>

Thu, 5 Jun 2025 06:06:29 +0000 (09:06 +0300)
author Georgi Gerganov <redacted>
Thu, 5 Jun 2025 06:06:29 +0000 (09:06 +0300)
committer GitHub <redacted>
Thu, 5 Jun 2025 06:06:29 +0000 (09:06 +0300)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp

index 7c1a642c19464f62b940bf9492a37dec0271e262..f1b43b9ccaaafa95abf68173972e42db59b3012f 100644 (file)
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1058,7 +1058,10 @@ int llama_context::decode(llama_batch & inp_batch) {
  
          if (!res) {
              // the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache
-            llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES] = { std::numeric_limits<llama_pos>::max() };
+            llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES];
+            for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
+                pos_min[s] = std::numeric_limits<llama_pos>::max();
+            }
  
              for (uint32_t i = 0; i < ubatch.n_tokens; ++i) {
                  const auto & seq_id = ubatch.seq_id[i][0];
author	Georgi Gerganov <redacted>
	Thu, 5 Jun 2025 06:06:29 +0000 (09:06 +0300)
committer	GitHub <redacted>
	Thu, 5 Jun 2025 06:06:29 +0000 (09:06 +0300)