server : disable checkpoints with mtmd (#17045)

author Georgi Gerganov <redacted>

Thu, 6 Nov 2025 10:09:29 +0000 (12:09 +0200)

committer GitHub <redacted>

Thu, 6 Nov 2025 10:09:29 +0000 (12:09 +0200)
author Georgi Gerganov <redacted>
Thu, 6 Nov 2025 10:09:29 +0000 (12:09 +0200)
committer GitHub <redacted>
Thu, 6 Nov 2025 10:09:29 +0000 (12:09 +0200)
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index f5089bef24af4f0c8c3acbafcdf5a3e6900db1f5..0e1a9afc869ded9dd203e54846e7aede7e009535 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -3832,7 +3832,9 @@ struct server_context {
                              // the largest pos_min required for a checkpoint to be useful
                              const auto pos_min_thold = std::max(0, n_past - n_swa);
  
-                            if (n_past > 0 && n_past < slot.prompt.n_tokens()) {
+                            // note: disallow with mtmd contexts for now
+                            //       https://github.com/ggml-org/llama.cpp/issues/17043
+                            if (!mctx && n_past > 0 && n_past < slot.prompt.n_tokens()) {
                                  const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id);
                                  if (pos_min == -1) {
                                      SLT_ERR(slot, "n_past = %d, slot.prompt.tokens.size() = %d, seq_id = %d, pos_min = %d\n", n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min);
author	Georgi Gerganov <redacted>
	Thu, 6 Nov 2025 10:09:29 +0000 (12:09 +0200)
committer	GitHub <redacted>
	Thu, 6 Nov 2025 10:09:29 +0000 (12:09 +0200)