From: Xuan-Son Nguyen Date: Wed, 4 Jun 2025 08:11:26 +0000 (+0200) Subject: llama-graph : use ggml_repeat_4d (#13998) X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=3ac67535c86e2fc43e4eddf594412acc370bbb04;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama-graph : use ggml_repeat_4d (#13998) --- diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 727e119e..c4bdd660 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -769,9 +769,8 @@ ggml_tensor * llm_graph_context::build_moe_ffn( cur = ggml_reshape_3d(ctx0, cur, n_embd, 1, n_tokens); if (weight_before_ffn) { - // TODO: this is a workaround as we don't yet have a repeat op that takes custom dim (ggml_repeat_4d) - ggml_tensor * repeated = ggml_new_tensor_3d(ctx0, cur->type, n_embd, n_expert_used, n_tokens); - repeated = ggml_repeat(ctx0, cur, repeated); // [n_embd, n_expert_used, n_tokens] + // repeat cur to [n_embd, n_expert_used, n_tokens] + ggml_tensor * repeated = ggml_repeat_4d(ctx0, cur, n_embd, n_expert_used, n_tokens, 1); cur = ggml_mul(ctx0, repeated, weights); cb(cur, "ffn_moe_weighted", il); }