From: Georgi Gerganov Date: Sat, 28 Mar 2026 14:27:36 +0000 (+0200) Subject: server : fix processing of multiple back-to-back mtmd chunks (#21107) X-Git-Tag: upstream/0.0.8611~42 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=edfb440a2f469ea3abaf397627bdc7d40b7777d8;p=pkg%2Fggml%2Fsources%2Fllama.cpp server : fix processing of multiple back-to-back mtmd chunks (#21107) --- diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index b79a5270b..6f737d94d 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -2493,7 +2493,7 @@ private: bool has_mtmd = false; // check if we should process the image - if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) { + while (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) { // process the image size_t n_tokens_out = 0; int32_t res = input_tokens.process_chunk(ctx, mctx, slot.prompt.n_tokens(), slot.prompt.tokens.pos_next(), slot.id, n_tokens_out);