From: Georgi Gerganov Date: Thu, 27 Nov 2025 14:34:13 +0000 (+0200) Subject: arch : add description about LLM_TENSOR_INFOS (#17550) X-Git-Tag: upstream/0.0.7446~268 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c386114922dcc54a041ed99f855defda3fa7f225;p=pkg%2Fggml%2Fsources%2Fllama.cpp arch : add description about LLM_TENSOR_INFOS (#17550) --- diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 6da9e0a3..f6e26245 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -2487,6 +2487,16 @@ static const std::map> LLM_TENSOR_N }, }; +// declare information about the model weight tensors: +// - the layer in which the tensor is going to be used. this is needed in order to assign the correct buffer type for the weight +// - the operator which is going to use the weight. this is needed to determine if the respective backend supports the operator +// +// for example, input layers are usually assigned to CPU/host buffer types +// +// a mismatch between the declared information and the actual layer/op in which the tensor is used can lead to sub-optimal +// assignment of the buffer types and extra overhead during computation +// example: https://github.com/ggml-org/llama.cpp/pull/17548 +// static const std::map LLM_TENSOR_INFOS = { {LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}}, {LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},