From: slaren Date: Thu, 18 Apr 2024 07:04:47 +0000 (+0200) Subject: llama : fix compatibility with old 2 expert models (#6735) X-Git-Tag: upstream/0.0.4488~1796 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c71bfd736ee99a56e697697b39240f2ee06ed26d;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : fix compatibility with old 2 expert models (#6735) --- diff --git a/llama.cpp b/llama.cpp index f4f4063c..8c144629 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4592,7 +4592,7 @@ static bool llm_load_tensors( size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output // for moe merged tensors - ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer; + ctx_size += ggml_tensor_overhead()*n_layer*3; std::map ctx_map; for (auto & it : buft_layer_count) {