* llama : model-based max number of graph nodes calculation
* Update src/llama.cpp
---------
Co-authored-by: slaren <redacted>
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
-// TODO: update when needed or think of some clever automatic way to do this
-static size_t llama_model_max_nodes(const llama_model & /*model*/) {
- //if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
- // return 32768;
- //}
-
- return 8192;
+static size_t llama_model_max_nodes(const llama_model & model) {
+ return std::max<size_t>(8192, model.tensors_by_name.size()*5);
}
struct llama_model_loader {