]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
ggml : update softmax n_task calculation (#5126)
authorsnadampal <redacted>
Fri, 26 Jan 2024 17:17:59 +0000 (11:17 -0600)
committerGitHub <redacted>
Fri, 26 Jan 2024 17:17:59 +0000 (19:17 +0200)
updated the n_task calculation to use max number of
threads possible. This has improved the prompt eval
performance by around 5% for DOT kernels and by
around 10% for MMLA kernels on AWS Graviton3.

ggml.c

diff --git a/ggml.c b/ggml.c
index ca98fde8ab2398974d578ed6e8f4350ed7cbf0ce..ef6fd8caf1bcd9a5eb474b8c27e0807d5a677ca2 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -16597,7 +16597,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
             } break;
         case GGML_OP_SOFT_MAX:
             {
-                n_tasks = MIN(MIN(4, n_threads), ggml_nrows(node->src[0]));
+                n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
             } break;
         case GGML_OP_CONV_TRANSPOSE_1D:
             {