context : fix reorder logic (#13267)

author Georgi Gerganov <redacted>

Fri, 2 May 2025 17:54:13 +0000 (20:54 +0300)

committer GitHub <redacted>

Fri, 2 May 2025 17:54:13 +0000 (20:54 +0300)
author Georgi Gerganov <redacted>
Fri, 2 May 2025 17:54:13 +0000 (20:54 +0300)
committer GitHub <redacted>
Fri, 2 May 2025 17:54:13 +0000 (20:54 +0300)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp

index a88b9a5ff90da607fc52dcc509f8ca4cb736e9ab..45591be992d8788303fc2f164984040ce103fff3 100644 (file)
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {
      // finalize the batch processing
      kv_guard.commit();
  
+    // set to total number of outputs in the batch, for use in llama_get_logits_ith
+    n_outputs = n_outputs_all;
+
      // set output mappings
      {
          bool sorted_output = true;
@@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {
          }
      }
  
-    // set to total number of outputs in the batch, for use in llama_get_logits_ith
-    n_outputs = n_outputs_all;
-
      // wait for the computation to finish (automatically done when obtaining the model output)
      //synchronize();
author	Georgi Gerganov <redacted>
	Fri, 2 May 2025 17:54:13 +0000 (20:54 +0300)
committer	GitHub <redacted>
	Fri, 2 May 2025 17:54:13 +0000 (20:54 +0300)