server : fix processing of multiple back-to-back mtmd chunks (#21107)

author Georgi Gerganov <redacted>

Sat, 28 Mar 2026 14:27:36 +0000 (16:27 +0200)

committer GitHub <redacted>

Sat, 28 Mar 2026 14:27:36 +0000 (16:27 +0200)
author Georgi Gerganov <redacted>
Sat, 28 Mar 2026 14:27:36 +0000 (16:27 +0200)
committer GitHub <redacted>
Sat, 28 Mar 2026 14:27:36 +0000 (16:27 +0200)
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp

index b79a5270b52af28f61786d5c793c7c14a25ebadb..6f737d94d0200bbbbf83b180121bd81e6d5f84fd 100644 (file)
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -2493,7 +2493,7 @@ private:
                      bool has_mtmd = false;
  
                      // check if we should process the image
-                    if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
+                    while (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
                          // process the image
                          size_t n_tokens_out = 0;
                          int32_t res = input_tokens.process_chunk(ctx, mctx, slot.prompt.n_tokens(), slot.prompt.tokens.pos_next(), slot.id, n_tokens_out);
author	Georgi Gerganov <redacted>
	Sat, 28 Mar 2026 14:27:36 +0000 (16:27 +0200)
committer	GitHub <redacted>
	Sat, 28 Mar 2026 14:27:36 +0000 (16:27 +0200)