From: Johannes Gäßler Date: Sat, 27 Dec 2025 19:20:45 +0000 (+0100) Subject: llama-fit-params: fix overflow check (#18354) X-Git-Tag: upstream/0.0.7599~45 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=a4bf35889eda36d3597cd0f8f333f5b8a2fcaefc;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama-fit-params: fix overflow check (#18354) --- diff --git a/src/llama.cpp b/src/llama.cpp index c53f2472..93a9c408 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -389,8 +389,8 @@ static void llama_params_fit_impl( tensor_buft_overrides[itbo].buft = nullptr; itbo++; mparams.tensor_buft_overrides = tensor_buft_overrides; - throw llama_params_fit_exception("llama_params_fit_n_tensor_buft_overrides() == " - + std::to_string(ntbo) + " is insufficient for model\n"); + throw llama_params_fit_exception("llama_max_tensor_buft_overrides() == " + + std::to_string(ntbo) + " is insufficient for model"); } tensor_buft_overrides[itbo].pattern = get_overflow_pattern(il, il == il0 ? ngl_per_device[id].overflow_type : LAYER_FRACTION_MOE); tensor_buft_overrides[itbo].buft = overflow_bufts[id]; @@ -647,7 +647,7 @@ static void llama_params_fit_impl( ngl_per_device_test[id].overflow_type = LAYER_FRACTION_UP; LLAMA_LOG_DEBUG("%s: trying to fit one extra layer with overflow_type=LAYER_FRACTION_UP\n", __func__); std::vector mem_test = get_memory_for_layers(__func__, ngl_per_device_test, overflow_bufts); - if (mem_test[id] < targets[id]) { + if (mem_test[id] < targets[id] && (id + 1 == nd || mem_test[id + 1] < targets[id + 1])) { ngl_per_device = ngl_per_device_test; mem = mem_test; id_dense_start = id_dense_start_test; @@ -657,7 +657,7 @@ static void llama_params_fit_impl( ngl_per_device_test[id].overflow_type = LAYER_FRACTION_GATE; LLAMA_LOG_DEBUG("%s: trying to fit one extra layer with overflow_type=LAYER_FRACTION_GATE\n", __func__); mem_test = get_memory_for_layers(__func__, ngl_per_device_test, overflow_bufts); - if (mem_test[id] < targets[id]) { + if (mem_test[id] < targets[id] && (id + 1 == nd || mem_test[id + 1] < targets[id + 1])) { ngl_per_device = ngl_per_device_test; mem = mem_test; id_dense_start = id_dense_start_test; @@ -668,7 +668,7 @@ static void llama_params_fit_impl( ngl_per_device_test[id].overflow_type = LAYER_FRACTION_ATTN; LLAMA_LOG_DEBUG("%s: trying to fit one extra layer with overflow_type=LAYER_FRACTION_ATTN\n", __func__); mem_test = get_memory_for_layers(__func__, ngl_per_device_test, overflow_bufts); - if (mem_test[id] < targets[id]) { + if (mem_test[id] < targets[id] && (id + 1 == nd || mem_test[id + 1] < targets[id + 1])) { ngl_per_device = ngl_per_device_test; mem = mem_test; id_dense_start = id_dense_start_test;