llama : fix compatibility with old 2 expert models (#6735)

author slaren <redacted>

Thu, 18 Apr 2024 07:04:47 +0000 (09:04 +0200)

committer GitHub <redacted>

Thu, 18 Apr 2024 07:04:47 +0000 (10:04 +0300)
author slaren <redacted>
Thu, 18 Apr 2024 07:04:47 +0000 (09:04 +0200)
committer GitHub <redacted>
Thu, 18 Apr 2024 07:04:47 +0000 (10:04 +0300)
diff --git a/llama.cpp b/llama.cpp

index f4f4063cf6062fb2b72b6d3add061489064da126..8c1446296fe35d63d63bb9c7c5777009d8e8d8a4 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -4592,7 +4592,7 @@ static bool llm_load_tensors(
      size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
  
      // for moe merged tensors
-    ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer;
+    ctx_size += ggml_tensor_overhead()*n_layer*3;
  
      std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
      for (auto & it : buft_layer_count) {