completion : simplify batch (embd) processing (#19286)

author Daniel Bevenius <redacted>

Wed, 4 Feb 2026 04:43:28 +0000 (05:43 +0100)

committer GitHub <redacted>

Wed, 4 Feb 2026 04:43:28 +0000 (05:43 +0100)
author Daniel Bevenius <redacted>
Wed, 4 Feb 2026 04:43:28 +0000 (05:43 +0100)
committer GitHub <redacted>
Wed, 4 Feb 2026 04:43:28 +0000 (05:43 +0100)
diff --git a/tools/completion/completion.cpp b/tools/completion/completion.cpp

index f368a2f4c65c8a90524fbebb61190d086bd0803d..977132756f7f9bb084782a9665610832fd2f06d8 100644 (file)
--- a/tools/completion/completion.cpp
+++ b/tools/completion/completion.cpp
@@ -674,15 +674,12 @@ int main(int argc, char ** argv) {
                  }
              }
  
-            for (int i = 0; i < (int) embd.size(); i += params.n_batch) {
-                int n_eval = (int) embd.size() - i;
-                if (n_eval > params.n_batch) {
-                    n_eval = params.n_batch;
-                }
-
+            if (!embd.empty()) {
+                int n_eval = (int) embd.size();
                  LOG_DBG("eval: %s\n", string_from(ctx, embd).c_str());
  
-                if (llama_decode(ctx, llama_batch_get_one(&embd[i], n_eval))) {
+                GGML_ASSERT(n_eval <= params.n_batch);
+                if (llama_decode(ctx, llama_batch_get_one(embd.data(), n_eval))) {
                      LOG_ERR("%s : failed to eval\n", __func__);
                      return 1;
                  }
@@ -743,7 +740,7 @@ int main(int argc, char ** argv) {
                  common_sampler_accept(smpl, embd_inp[n_consumed], /* accept_grammar= */ false);
  
                  ++n_consumed;
-                if ((int) embd.size() >= params.n_batch) {
+                if ((int) embd.size() == params.n_batch) {
                      break;
                  }
              }
author	Daniel Bevenius <redacted>
	Wed, 4 Feb 2026 04:43:28 +0000 (05:43 +0100)
committer	GitHub <redacted>
	Wed, 4 Feb 2026 04:43:28 +0000 (05:43 +0100)