From: Sigbjørn Skjæret Date: Fri, 3 Oct 2025 12:40:25 +0000 (+0200) Subject: llama : fix shapes for bert/mpt q/k norm (#16409) X-Git-Tag: upstream/0.0.6764~81 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=946f71ed9ade07e319859b5ce656144140e066fb;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : fix shapes for bert/mpt q/k norm (#16409) --- diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 19643d60..cce77a85 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -7843,6 +7843,8 @@ struct llm_build_bert : public llm_graph_context { } if (model.layers[il].attn_q_norm) { + Qcur = ggml_reshape_2d(ctx0, Qcur, n_embd_head*n_head, n_tokens); + Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, model.layers[il].attn_q_norm_b, @@ -7852,6 +7854,8 @@ struct llm_build_bert : public llm_graph_context { } if (model.layers[il].attn_k_norm) { + Kcur = ggml_reshape_2d(ctx0, Kcur, n_embd_head*n_head_kv, n_tokens); + Kcur = build_norm(Kcur, model.layers[il].attn_k_norm, model.layers[il].attn_k_norm_b, @@ -8234,6 +8238,9 @@ struct llm_build_mpt : public llm_graph_context { // Q/K Layernorm if (model.layers[il].attn_q_norm) { + Qcur = ggml_reshape_2d(ctx0, Qcur, n_embd_head*n_head, n_tokens); + Kcur = ggml_reshape_2d(ctx0, Kcur, n_embd_head*n_head_kv, n_tokens); + Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, model.layers[il].attn_q_norm_b,