cur = ggml_reshape_3d(ctx0, cur, n_embd, 1, n_tokens);
if (weight_before_ffn) {
- // TODO: this is a workaround as we don't yet have a repeat op that takes custom dim (ggml_repeat_4d)
- ggml_tensor * repeated = ggml_new_tensor_3d(ctx0, cur->type, n_embd, n_expert_used, n_tokens);
- repeated = ggml_repeat(ctx0, cur, repeated); // [n_embd, n_expert_used, n_tokens]
+ // repeat cur to [n_embd, n_expert_used, n_tokens]
+ ggml_tensor * repeated = ggml_repeat_4d(ctx0, cur, n_embd, n_expert_used, n_tokens, 1);
cur = ggml_mul(ctx0, repeated, weights);
cb(cur, "ffn_moe_weighted", il);
}