}
if (model.layers[il].attn_q_norm) {
+ Qcur = ggml_reshape_2d(ctx0, Qcur, n_embd_head*n_head, n_tokens);
+
Qcur = build_norm(Qcur,
model.layers[il].attn_q_norm,
model.layers[il].attn_q_norm_b,
}
if (model.layers[il].attn_k_norm) {
+ Kcur = ggml_reshape_2d(ctx0, Kcur, n_embd_head*n_head_kv, n_tokens);
+
Kcur = build_norm(Kcur,
model.layers[il].attn_k_norm,
model.layers[il].attn_k_norm_b,
// Q/K Layernorm
if (model.layers[il].attn_q_norm) {
+ Qcur = ggml_reshape_2d(ctx0, Qcur, n_embd_head*n_head, n_tokens);
+ Kcur = ggml_reshape_2d(ctx0, Kcur, n_embd_head*n_head_kv, n_tokens);
+
Qcur = build_norm(Qcur,
model.layers[il].attn_q_norm,
model.layers[il].attn_q_norm_b,