From: Michael Grau Date: Wed, 18 Mar 2026 22:25:12 +0000 (+0100) Subject: model : add control vector support where missing (#20653) X-Git-Tag: upstream/0.0.8611~195 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=6729d4920c7509f0d110f114a9652793b5fe668a;p=pkg%2Fggml%2Fsources%2Fllama.cpp model : add control vector support where missing (#20653) * Add control vector functions to qwen3.5 and qwen-next models * Add missing cvec compatibility to the rest of the models * Adjust comments and formatting * cleanup * whitespace --------- Co-authored-by: Sigbjørn Skjæret --- diff --git a/src/models/bitnet.cpp b/src/models/bitnet.cpp index ccf5bc8e8..9f41b7d82 100644 --- a/src/models/bitnet.cpp +++ b/src/models/bitnet.cpp @@ -121,6 +121,9 @@ llm_build_bitnet::llm_build_bitnet(const llama_model & model, const llm_graph_pa cur = ggml_add(ctx0, cur, ffn_inp); cb(cur, "l_out", il); + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + // input for next layer inpL = cur; } diff --git a/src/models/chatglm.cpp b/src/models/chatglm.cpp index 5887ed22e..cd11581a5 100644 --- a/src/models/chatglm.cpp +++ b/src/models/chatglm.cpp @@ -111,8 +111,13 @@ llm_build_chatglm::llm_build_chatglm(const llama_model & model, const llm_graph_ } - inpL = ggml_add(ctx0, cur, ffn_inp); - cb(inpL, "l_out", il); + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } cur = build_norm(inpL, diff --git a/src/models/cogvlm.cpp b/src/models/cogvlm.cpp index 2ef2b6e38..fa7a54ba1 100644 --- a/src/models/cogvlm.cpp +++ b/src/models/cogvlm.cpp @@ -86,6 +86,10 @@ llm_build_cogvlm::llm_build_cogvlm(const llama_model & model, const llm_graph_pa cur = ggml_add(ctx0, cur, ffn_inp); cb(cur, "ffn_out", il); + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer inpL = cur; } diff --git a/src/models/eurobert.cpp b/src/models/eurobert.cpp index e8628d165..4ca9af873 100644 --- a/src/models/eurobert.cpp +++ b/src/models/eurobert.cpp @@ -82,6 +82,7 @@ llm_build_eurobert::llm_build_eurobert(const llama_model & model, const llm_grap cur = ggml_add(ctx0, cur, ffn_inp); + // input for next layer inpL = cur; } cur = inpL; diff --git a/src/models/jais.cpp b/src/models/jais.cpp index 135bf288b..b28243901 100644 --- a/src/models/jais.cpp +++ b/src/models/jais.cpp @@ -66,8 +66,14 @@ llm_build_jais::llm_build_jais(const llama_model & model, const llm_graph_params LLM_FFN_SILU, LLM_FFN_PAR, il); cb(cur, "ffn_out", il); } - inpL = ggml_add(ctx0, cur, ffn_inp); - cb(inpL, "l_out", il); + + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; } cur = build_norm(inpL, model.output_norm, diff --git a/src/models/kimi-linear.cpp b/src/models/kimi-linear.cpp index 4d62f4e71..f189b7107 100644 --- a/src/models/kimi-linear.cpp +++ b/src/models/kimi-linear.cpp @@ -362,6 +362,7 @@ llm_build_kimi_linear::llm_build_kimi_linear(const llama_model & model, const ll cur = build_cvec(cur, il); cb(cur, "l_out", il); + // input for next layer inpL = cur; } cur = inpL; diff --git a/src/models/lfm2.cpp b/src/models/lfm2.cpp index dfa322166..925c3dc9b 100644 --- a/src/models/lfm2.cpp +++ b/src/models/lfm2.cpp @@ -177,6 +177,9 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_ cb(ffn_norm_out, "model.layers.{}.ffn_out", il); cur = ggml_add(ctx0, cur, ffn_out); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); } cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1); diff --git a/src/models/plamo2.cpp b/src/models/plamo2.cpp index f02acbc18..0bde0b3d8 100644 --- a/src/models/plamo2.cpp +++ b/src/models/plamo2.cpp @@ -71,6 +71,7 @@ llm_build_plamo2::llm_build_plamo2(const llama_model & model, const llm_graph_pa cur = ggml_add(ctx0, cur, residual); cb(cur, "ffn_residual", il); + // input for next layer inpL = cur; } diff --git a/src/models/plamo3.cpp b/src/models/plamo3.cpp index 32af6e046..7cb9da6e7 100644 --- a/src/models/plamo3.cpp +++ b/src/models/plamo3.cpp @@ -109,6 +109,8 @@ llm_build_plamo3::llm_build_plamo3(const llama_model & model, const llm_gr cur = build_cvec(cur, il); cb(cur, "l_out", il); + + // input for next layer inpL = cur; } diff --git a/src/models/qwen35.cpp b/src/models/qwen35.cpp index d07579ee8..e0e48d2a4 100644 --- a/src/models/qwen35.cpp +++ b/src/models/qwen35.cpp @@ -64,6 +64,9 @@ llm_build_qwen35::llm_build_qwen35(const llama_model & model, const llm_graph_pa cur = ggml_add(ctx0, cur, ffn_residual); cb(cur, "post_ffn", il); + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + // Input for next layer inpL = cur; } diff --git a/src/models/qwen35moe.cpp b/src/models/qwen35moe.cpp index b38660c0b..15baea80b 100644 --- a/src/models/qwen35moe.cpp +++ b/src/models/qwen35moe.cpp @@ -64,6 +64,9 @@ llm_build_qwen35moe::llm_build_qwen35moe(const llama_model & model, const llm_gr cur = ggml_add(ctx0, cur, ffn_residual); cb(cur, "post_moe", il); + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + // Input for next layer inpL = cur; } diff --git a/src/models/qwen3next.cpp b/src/models/qwen3next.cpp index cc479dd07..dbfc0874d 100644 --- a/src/models/qwen3next.cpp +++ b/src/models/qwen3next.cpp @@ -56,6 +56,9 @@ llm_build_qwen3next::llm_build_qwen3next(const llama_model & model, const llm_gr cur = ggml_add(ctx0, cur, ffn_residual); cb(cur, "post_moe", il); + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + // Input for next layer inpL = cur; } diff --git a/src/models/smallthinker.cpp b/src/models/smallthinker.cpp index e2155aace..0f7ef462b 100644 --- a/src/models/smallthinker.cpp +++ b/src/models/smallthinker.cpp @@ -101,6 +101,7 @@ llm_build_smallthinker::llm_build_smallthinker(const llama_model & model, cur = ffn_out; cur = ggml_add(ctx0, cur, ffn_inp); + cur = build_cvec(cur, il); cb(cur, "l_out", il); diff --git a/src/models/step35-iswa.cpp b/src/models/step35-iswa.cpp index 176209cd9..c80cb26c5 100644 --- a/src/models/step35-iswa.cpp +++ b/src/models/step35-iswa.cpp @@ -145,9 +145,11 @@ llm_build_step35_iswa::llm_build_step35_iswa(const llama_model & model, const ll cb(cur, "ffn_out", il); } cur = ggml_add(ctx0, cur, ffn_inp); + cur = build_cvec(cur, il); cb(cur, "l_out", il); + // input for next layer inpL = cur; }