llama-fit-params: keep explicit --ctx-size 0 (#19070)

author Johannes Gäßler <redacted>

Sat, 24 Jan 2026 21:13:08 +0000 (22:13 +0100)

committer GitHub <redacted>

Sat, 24 Jan 2026 21:13:08 +0000 (22:13 +0100)
author Johannes Gäßler <redacted>
Sat, 24 Jan 2026 21:13:08 +0000 (22:13 +0100)
committer GitHub <redacted>
Sat, 24 Jan 2026 21:13:08 +0000 (22:13 +0100)
diff --git a/common/arg.cpp b/common/arg.cpp

index 163c9b71b0e070963b66689b14f53754262c94b2..98477e811791dd0fed8c301e366b75792e5b4f4b 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1231,6 +1231,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx),
          [](common_params & params, int value) {
              params.n_ctx = value;
+            if (value == 0) {
+                // disable context reduction in llama_params_fit if the user explicitly requests the full context size:
+                params.fit_params_min_ctx = UINT32_MAX;
+            }
          }
      ).set_env("LLAMA_ARG_CTX_SIZE"));
      add_opt(common_arg(
diff --git a/include/llama.h b/include/llama.h

index 280745713e52ae54846dac5f1380d21ffaaed9a3..1507107f1aa49723dc7bf16cec36354f8fa0ef4f 100644 (file)
--- a/include/llama.h
+++ b/include/llama.h
@@ -489,6 +489,7 @@ extern "C" {
      //   - returns true if the parameters could be successfully modified to fit device memory
      //   - this function is NOT thread safe because it modifies the global llama logger state
      //   - only parameters that have the same value as in llama_default_model_params are modified
+    //     with the exception of the context size which is modified if and only if equal to 0
      LLAMA_API enum llama_params_fit_status llama_params_fit(
                                     const char   * path_model,
                      struct llama_model_params   * mparams,
diff --git a/src/llama.cpp b/src/llama.cpp

index f1096d960e130ce83140157d020b7af191fd8a5c..11b75fcff955fd0f6ddedaa3638b285920dbe815 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -311,8 +311,12 @@ static void llama_params_fit_impl(
                              __func__, hp_nct, cparams->n_ctx, memory_reduction/MiB);
                      }
                  } else {
-                    LLAMA_LOG_INFO("%s: default model context size is %" PRIu32 " which is <= the min. context size of %" PRIu32 " -> no change\n",
-                        __func__, hp_nct, n_ctx_min);
+                    if (n_ctx_min == UINT32_MAX) {
+                        LLAMA_LOG_INFO("%s: user has requested full context size of %" PRIu32 " -> no change\n", __func__, hp_nct);
+                    } else {
+                        LLAMA_LOG_INFO("%s: default model context size is %" PRIu32 " which is <= the min. context size of %" PRIu32 " -> no change\n",
+                            __func__, hp_nct, n_ctx_min);
+                    }
                  }
              } else {
                  LLAMA_LOG_INFO("%s: context size set by user to %" PRIu32 " -> no change\n", __func__, cparams->n_ctx);
diff --git a/tools/fit-params/fit-params.cpp b/tools/fit-params/fit-params.cpp

index f9d9cb34c7d424ea9a56330cf7cf8936eb5772f2..0176be06e78f992772fec20364f0f3e93169de7d 100644 (file)
--- a/tools/fit-params/fit-params.cpp
+++ b/tools/fit-params/fit-params.cpp
@@ -36,7 +36,7 @@ int main(int argc, char ** argv) {
  
      LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__);
      common_log_flush(common_log_main());
-    printf("-c %" PRIu32 " -ngl %" PRIu32, cparams.n_ctx, mparams.n_gpu_layers);
+    printf("-c %" PRIu32 " -ngl %" PRIi32, cparams.n_ctx, mparams.n_gpu_layers);
  
      size_t nd = llama_max_devices();
      while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) {
author	Johannes Gäßler <redacted>
	Sat, 24 Jan 2026 21:13:08 +0000 (22:13 +0100)
committer	GitHub <redacted>
	Sat, 24 Jan 2026 21:13:08 +0000 (22:13 +0100)
common/arg.cpp		patch \| blob \| history
include/llama.h		patch \| blob \| history
src/llama.cpp		patch \| blob \| history
tools/fit-params/fit-params.cpp		patch \| blob \| history