From: slaren Date: Thu, 8 Feb 2024 20:33:03 +0000 (+0100) Subject: llama : do not print "offloading layers" message in CPU-only builds (#5416) X-Git-Tag: upstream/0.0.4488~2383 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=41f308f58edc2a04bcf9e245100b0a9b10e9a0fb;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : do not print "offloading layers" message in CPU-only builds (#5416) --- diff --git a/llama.cpp b/llama.cpp index 89acafbc..db7d1c1c 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4209,8 +4209,7 @@ static bool llm_load_tensors( ctx_bufs.emplace_back(ctx, buf); } - // print memory requirements - { + if (llama_supports_gpu_offload()) { const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer)); LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu); @@ -4222,10 +4221,11 @@ static bool llm_load_tensors( const int max_offloadable_layers = hparams.n_layer + 1; LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers); + } - for (ggml_backend_buffer_t buf : model.bufs) { - LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); - } + // print memory requirements + for (ggml_backend_buffer_t buf : model.bufs) { + LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); } // populate tensors_by_name