]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
server : fix "can batch with" bug (#17263)
authorGeorgi Gerganov <redacted>
Fri, 14 Nov 2025 12:03:45 +0000 (14:03 +0200)
committerGitHub <redacted>
Fri, 14 Nov 2025 12:03:45 +0000 (14:03 +0200)
tools/server/server.cpp

index 7dbd8b6a002d19520416f0396b9fcecb17f746cd..535d2c450e21eb64704de91b3e1bdf9452e6c087 100644 (file)
@@ -3591,13 +3591,13 @@ struct server_context {
         // next, batch any pending prompts without exceeding n_batch
         if (params_base.cont_batching || batch.n_tokens == 0) {
             for (auto & slot : slots) {
+                if (!slot.is_processing()) {
+                    continue;
+                }
+
                 // check if we can batch this slot with the previous one
-                if (slot.is_processing()) {
-                    if (!slot_batched) {
-                        slot_batched = &slot;
-                    } else if (!slot_batched->can_batch_with(slot)) {
-                        continue;
-                    }
+                if (slot_batched && !slot_batched->can_batch_with(slot)) {
+                    continue;
                 }
 
                 // this slot still has a prompt to be processed
@@ -4028,6 +4028,10 @@ struct server_context {
                     }
                 }
 
+                if (!slot_batched) {
+                    slot_batched = &slot;
+                }
+
                 if (batch.n_tokens >= n_batch) {
                     break;
                 }