int32_t layer_start = -1;
int32_t layer_end = -1;
- ggml_tensor * tensor_for(int il) const {
+ struct ggml_tensor * tensor_for(int il) const {
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
return nullptr;
}
return tensors[il];
}
+ struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
+ ggml_tensor * layer_dir = tensor_for(il);
+ if (layer_dir != nullptr) {
+ cur = ggml_add(ctx, cur, layer_dir);
+ }
+ return cur;
+ }
+
~llama_control_vector() {
for (struct ggml_context * ctx : ctxs) {
ggml_free(ctx);
cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "ffn_out", il);
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
- if (layer_dir != nullptr) {
- cur = ggml_add(ctx0, cur, layer_dir);
- }
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_inp);
- cb(cur, "l_out", il);
-
cur = ggml_add(ctx0, cur, inpL);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "ffn_out", il);
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
- if (layer_dir != nullptr) {
- cur = ggml_add(ctx0, cur, layer_dir);
- }
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "ffn_out", il);
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
- if (layer_dir != nullptr) {
- cur = ggml_add(ctx0, cur, layer_dir);
- }
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cb(cur, "ffn_out", il);
}
- inpL = ggml_add(ctx0, cur, ffn_inp);
- cb(inpL, "l_out", il);
+ cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
+ cb(cur, "l_out", il);
+
+ // input for next layer
+ inpL = cur;
}
cur = llm_build_norm(ctx0, inpL, hparams,
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cb(cur, "ffn_out", il);
}
- inpL = ggml_add(ctx0, cur, ffn_inp);
- cb(inpL, "l_out", il);
+ cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
+ cb(cur, "l_out", il);
+
+ // input for next layer
+ inpL = cur;
}
cur = llm_build_norm(ctx0, inpL, hparams,
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cb(cur, "ffn_out", il);
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_output);
- cb(cur, "l_out", il);
-
cur = ggml_add(ctx0, cur, inpL);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
+ // input for next layer
inpL = cur;
}
}
cur = ggml_add(ctx0, residual, cur);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
+ // input for next layer
inpL = cur;
}
}
cur = ggml_add(ctx0, cur, sa_out);
- cb(cur, "l_out", il);
-
cur = ggml_add(ctx0, cur, inpL);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cb(cur, "ffn_out", il);
}
- inpL = ggml_add(ctx0, cur, ffn_inp);
- cb(inpL, "l_out", il);
+ cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
+ cb(cur, "l_out", il);
+
+ // input for next layer
+ inpL = cur;
}
cur = llm_build_norm(ctx0, inpL, hparams,
cb(cur, "ffn_out", il);
}
- inpL = ggml_add(ctx0, cur, ffn_inp);
- cb(inpL, "l_out", il);
+ cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
+ cb(cur, "l_out", il);
+
+ // input for next layer
+ inpL = cur;
}
cur = llm_build_norm(ctx0, inpL, hparams,
cb(cur, "ffn_out", il);
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cb(cur, "ffn_out", il);
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cb(cur, "hidden_scaled_ffn", -1);
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, sa_out);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
NULL,
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
cb(cur, "ffn_out", il);
+
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
// residual
cur = ggml_add(ctx0, cur, inpL);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
// add together residual + FFN + self-attention
cur = ggml_add(ctx0, cur, inpL);
cur = ggml_add(ctx0, cur, attn_out);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "ffn_out", il);
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
- if (layer_dir != nullptr) {
- cur = ggml_add(ctx0, cur, layer_dir);
- }
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
cur = ggml_add(ctx0, cur, inpL);
cb(cur, "ffn_out", il);
- inpL = ggml_add(ctx0, cur, attn_out);
- cb(inpL, "l_out", il);
+ cur = ggml_add(ctx0, cur, attn_out);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
+ cb(cur, "l_out", il);
+
+ // input for next layer
+ inpL = cur;
} else {
// attention and ffn are computed sequentially
// x = x + attn(ln1(x))
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
cb(cur, "ffn_out", il);
- inpL = ggml_add(ctx0, cur, ffn_inp);
- cb(inpL, "l_out", il);
+ cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
+ cb(cur, "l_out", il);
+
+ // input for next layer
+ inpL = cur;
}
}
cur = ggml_add(ctx0, cur, ffn_out);
cb(cur, "ffn_out", il);
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
- if (layer_dir != nullptr) {
- cur = ggml_add(ctx0, cur, layer_dir);
- }
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer
}
cur = ggml_add(ctx0, cur, ffn_inp);
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
cb(cur, "l_out", il);
// input for next layer