From: ymcki Date: Fri, 13 Feb 2026 08:10:18 +0000 (+0800) Subject: model : Kimi Linear fix conv state update (#19531) X-Git-Tag: upstream/0.0.8067~39 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=33a56f90a6a793a3c7b1f6ca39ff43a1cecd0b61;p=pkg%2Fggml%2Fsources%2Fllama.cpp model : Kimi Linear fix conv state update (#19531) * fix conv state update for llama-server parallel serving --------- Co-authored-by: Piotr Wilkin (ilintar) --- diff --git a/src/models/kimi-linear.cpp b/src/models/kimi-linear.cpp index 0f037d1a3..942844d07 100644 --- a/src/models/kimi-linear.cpp +++ b/src/models/kimi-linear.cpp @@ -41,8 +41,11 @@ static ggml_tensor * causal_conv1d(ggml_cgraph * gf, ggml_context * ctx0, ggml_t conv_x->nb[1], conv_x->nb[2], n_seq_tokens * conv_x->nb[0]); ggml_build_forward_expand(gf, ggml_cpy(ctx0, last_conv_x, - ggml_view_1d(ctx0, conv_states_all, conv_state_size * n_seqs, - (kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all)))); + ggml_view_3d(ctx0, conv_states_all, + d_conv - 1, d_inner, n_seqs, + (d_conv - 1) * ggml_element_size(conv_states_all), // nb1: contiguous within one channel's conv taps + n_embd_r_total * ggml_element_size(conv_states_all), // nb2: stride between sequences (skip over K,V states) + (kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all)))); // offset to first seq's Q/K/V state // Reshape conv weight: GGUF [d_conv, 1, d_inner, 1] -> ggml_ssm_conv expects [d_conv, d_inner] // GGUF stores as [d_conv, 1, d_inner, 1] with memory layout w[conv_step + channel * d_conv] // vLLM stores as [d_inner, d_conv] with memory layout w[channel * d_conv + conv_step]