cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
}
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
}
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
cb(cur, "kqv_out", il);
}
cur = build_attn(inp_attn,
model.layers[il].wo, nullptr,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
cb(cur, "kqv_out", il);
}
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cb(Kcur, "Kcur", il);
cb(Vcur, "Vcur", il);
- cur = build_attn(inp_attn, model.layers[il].wo, model.layers[il].bo, Qcur, Kcur, Vcur, nullptr,
- nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
+ cur = build_attn(inp_attn,
+ model.layers[il].wo, model.layers[il].bo,
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cb(Kcur, "Kcur", il);
cb(Vcur, "Vcur", il);
- cur = build_attn(inp_attn, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, nullptr, nullptr,
- 1.0f / sqrtf(float(n_embd_head)), il);
+ cur = build_attn(inp_attn,
+ model.layers[il].wo, NULL,
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- q_states, k_states, v_states, nullptr, nullptr, kq_scale, il);
+ q_states, k_states, v_states, nullptr, nullptr, nullptr, kq_scale, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, hparams.f_attention_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, hparams.f_attention_scale, il);
} else {
// no KV layers
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, nullptr, nullptr, nullptr, nullptr, hparams.f_attention_scale, il);
+ Qcur, nullptr, nullptr, nullptr, nullptr, nullptr, hparams.f_attention_scale, il);
}
cur = build_norm(cur,
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cb(Vcur, "Vcur", il);
// No RoPE :)
- cur = build_attn(inp_hybrid->get_attn(), model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f/sqrtf(float(n_embd_head)), il);
+ cur = build_attn(inp_hybrid->get_attn(),
+ model.layers[il].wo, NULL,
+ Qcur, Kcur, Vcur, NULL, NULL, NULL, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, nullptr,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
}
if (il == n_layer - 1 && inp_out_ids) {
// note: MLA with the absorption optimzation converts into MQA (ie: GQA with 1 group)
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, model.layers[il].wv_b, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, model.layers[il].wv_b, kq_scale, il);
} else {
ggml_tensor * kv = ggml_mul_mat(ctx0, model.layers[il].wkv_b, kv_cmpr);
cb(kv, "kv", il);
// note: MLA without the absorption optimization converts into MHA (ie: GQA with full n_head groups)
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
}
}
cur = build_attn(inp_attn,
NULL, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
cur = build_norm(cur,
model.layers[il].attn_sub_norm, NULL,
cur = build_attn(inp_attn,
model.layers[il].wo_enc, nullptr,
- Qcur, Kcur, Vcur, kq_b, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, kq_b, nullptr, nullptr, 1.0f, il);
cb(cur, "kqv_out", il);
}
cur = build_attn(inp_attn_self,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, kq_b, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, kq_b, nullptr, nullptr, 1.0f, il);
cb(cur, "kqv_out", il);
}
cur = build_attn(inp_attn_cross,
model.layers[il].wo_cross, nullptr,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
cb(cur, "kqv_out", il);
//ggml_tensor * q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3);
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/float(n_embd_head), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/float(n_embd_head), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_transformer_layers - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
cb(cur, "attn_out", il);
}
const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale;
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
return cur;
}
const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale;
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
return cur;
}
cur = build_attn(inp_attn,
model.layers[il].wo, nullptr,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- q_states, k_states, v_states, nullptr, nullptr, kq_scale, il);
+ q_states, k_states, v_states, nullptr, nullptr, nullptr, kq_scale, il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_rot)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_rot)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1) {
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
cb(cur, "attn_out", il);
}
ggml_tensor * attn_out = build_attn(inp->get_attn(),
model.layers[il].wo, NULL,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(attn_out, "attn_out", il);
cur = build_norm(inpL,
ext_factor, attn_factor, beta_fast, beta_slow
);
- cur = build_attn(inp, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f/sqrtf(float(n_embd_head_v)), il);
+ cur = build_attn(inp,
+ model.layers[il].wo, NULL,
+ Qcur, Kcur, Vcur, NULL, NULL, NULL, 1.0f/sqrtf(float(n_embd_head_v)), il);
}
cb(cur, "attn_out", il);
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
}
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
}
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
}
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_out", il);
}
cb(Kcur, "Kcur", il);
cb(Vcur, "Vcur", il);
- cur = build_attn_with_sinks(inp_attn,
+ cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, model.layers[il].attn_sinks, 1.0f/sqrtf(float(n_rot)), il);
+ Qcur, Kcur, Vcur, nullptr, model.layers[il].attn_sinks, nullptr, 1.0f/sqrtf(float(n_rot)), il);
cb(cur, "attn_out", il);
}
);
cur = build_attn(inp_attn, model.layers[il].wo, NULL,
- q, k, v, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+ q, k, v, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
cb(cur, "model.layers.{}.self_attn.out_proj", il);
cur = build_attn(inp_attn,
model.layers[il].wo, model.layers[il].bo,
- Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
+ Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {