From: Aman Gupta Date: Mon, 30 Mar 2026 09:40:17 +0000 (+0800) Subject: llama-model-loader: print warning when using overrides with mmap (#20978) X-Git-Tag: upstream/0.0.8611~28 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=278521c33a11b89d9d7ed2afe5c20502840816b1;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama-model-loader: print warning when using overrides with mmap (#20978) * llama-model-loader: use pinned memory for tensor overrides * change to warning --- diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 2457a7ed4..3d549cae5 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -1158,6 +1158,12 @@ struct ggml_tensor * llama_model_loader::create_tensor( if (overrides->buft == ggml_backend_cpu_buffer_type()) { // when overriding to a CPU buffer, consider the extra buffer types buft = select_weight_buft(hparams, t_meta, op, buft_list_cpu); + if (use_mmap) { + static std::once_flag once; + std::call_once(once, [] { + LLAMA_LOG_WARN("llama_model_loader: tensor overrides to CPU are used with mmap enabled - consider using --no-mmap for better performance\n"); + }); + } } else { buft = overrides->buft; }