server : do not create checkpoints right after mtmd chunks (#20232)

author Georgi Gerganov <redacted>

Sun, 8 Mar 2026 20:16:46 +0000 (22:16 +0200)

committer GitHub <redacted>

Sun, 8 Mar 2026 20:16:46 +0000 (22:16 +0200)
author Georgi Gerganov <redacted>
Sun, 8 Mar 2026 20:16:46 +0000 (22:16 +0200)
committer GitHub <redacted>
Sun, 8 Mar 2026 20:16:46 +0000 (22:16 +0200)
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp

index 9dbd6d798a3d214aebbdb29f765f1a09d17b1a9b..3790308d0eac4610f93aa985d0fc0e5e3b14c50a 100644 (file)
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -2438,6 +2438,8 @@ private:
                          slot.n_prompt_tokens_cache = 0;
                      }
  
+                    bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
+
                      // check if we should process the image
                      if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
                          // process the image
@@ -2457,6 +2459,8 @@ private:
                              const auto & chunk = input_tokens.find_chunk(slot.prompt.n_tokens());
                              slot.prompt.tokens.push_back(chunk.get()); // copy
                          }
+
+                        do_checkpoint = false; // do not checkpoint right after an image chunk
                      }
  
                      // If using an alora, there may be uncached tokens that come
@@ -2473,8 +2477,6 @@ private:
                          alora_disabled_id = enabled_loras[0];
                      }
  
-                    bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
-
                      // make checkpoints only for completion tasks
                      do_checkpoint = do_checkpoint && slot.task->type == SERVER_TASK_TYPE_COMPLETION;
author	Georgi Gerganov <redacted>
	Sun, 8 Mar 2026 20:16:46 +0000 (22:16 +0200)
committer	GitHub <redacted>
	Sun, 8 Mar 2026 20:16:46 +0000 (22:16 +0200)