]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
llama : fix compatibility with old 2 expert models (#6735)
authorslaren <redacted>
Thu, 18 Apr 2024 07:04:47 +0000 (09:04 +0200)
committerGitHub <redacted>
Thu, 18 Apr 2024 07:04:47 +0000 (10:04 +0300)
llama.cpp

index f4f4063cf6062fb2b72b6d3add061489064da126..8c1446296fe35d63d63bb9c7c5777009d8e8d8a4 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -4592,7 +4592,7 @@ static bool llm_load_tensors(
     size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
 
     // for moe merged tensors
-    ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer;
+    ctx_size += ggml_tensor_overhead()*n_layer*3;
 
     std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
     for (auto & it : buft_layer_count) {