From: Johannes Gäßler Date: Tue, 16 Dec 2025 13:24:00 +0000 (+0100) Subject: llama: fix early stop in params_fit if ctx is set (#18070) X-Git-Tag: upstream/0.0.7446~9 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=ec98e20021f7611db3bbcf6bb6629fed6e1ce4f0;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama: fix early stop in params_fit if ctx is set (#18070) --- diff --git a/src/llama.cpp b/src/llama.cpp index 7ed34b80..f69964b6 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -241,6 +241,13 @@ static void llama_params_fit_impl( global_surplus += memory_reduction; LLAMA_LOG_INFO("%s: context size reduced from %" PRIu32 " to %" PRIu32 " -> need %" PRId64 " MiB less memory in total\n", __func__, hp_nct, cparams->n_ctx, memory_reduction/MiB); + if (global_surplus >= 0) { + if (nd == 1) { + LLAMA_LOG_INFO("%s: entire model can be fit by reducing context\n", __func__); + return; + } + LLAMA_LOG_INFO("%s: entire model should be fit across devices by reducing context\n", __func__); + } } else { LLAMA_LOG_INFO("%s: default model context size is %" PRIu32 " which is <= the min. context size of %" PRIu32 " -> no change\n", __func__, hp_nct, n_ctx_min); @@ -249,10 +256,6 @@ static void llama_params_fit_impl( LLAMA_LOG_INFO("%s: context size set by user to %" PRIu32 " -> no change\n", __func__, cparams->n_ctx); } } - if (global_surplus >= 0) { - LLAMA_LOG_INFO("%s: entire model can be fit across devices by reducing context\n", __func__); - return; - } } if (mparams->n_gpu_layers != default_mparams.n_gpu_layers) {