llama : model-based max number of graph nodes calculation (#8970)

author Nico Bosshard <redacted>

Mon, 12 Aug 2024 15:13:59 +0000 (17:13 +0200)

committer GitHub <redacted>

Mon, 12 Aug 2024 15:13:59 +0000 (17:13 +0200)
author Nico Bosshard <redacted>
Mon, 12 Aug 2024 15:13:59 +0000 (17:13 +0200)
committer GitHub <redacted>
Mon, 12 Aug 2024 15:13:59 +0000 (17:13 +0200)
diff --git a/src/llama.cpp b/src/llama.cpp

index aaf8db496ecbdc73a753f3577896bac638778e5c..7f2f0003142a3f03ef7979149ce38d101aa84140 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3575,13 +3575,8 @@ namespace GGUFMeta {
  
  using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
  
-// TODO: update when needed or think of some clever automatic way to do this
-static size_t llama_model_max_nodes(const llama_model & /*model*/) {
-    //if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
-    //    return 32768;
-    //}
-
-    return 8192;
+static size_t llama_model_max_nodes(const llama_model & model) {
+    return std::max<size_t>(8192, model.tensors_by_name.size()*5);
  }
  
  struct llama_model_loader {
author	Nico Bosshard <redacted>
	Mon, 12 Aug 2024 15:13:59 +0000 (17:13 +0200)
committer	GitHub <redacted>
	Mon, 12 Aug 2024 15:13:59 +0000 (17:13 +0200)