]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
model : Kimi Linear fix conv state update (#19531)
authorymcki <redacted>
Fri, 13 Feb 2026 08:10:18 +0000 (16:10 +0800)
committerGitHub <redacted>
Fri, 13 Feb 2026 08:10:18 +0000 (09:10 +0100)
* fix conv state update for llama-server parallel serving

---------

Co-authored-by: Piotr Wilkin (ilintar) <redacted>
src/models/kimi-linear.cpp

index 0f037d1a39324a449333731a808ca5da55885455..942844d071f32720f3bb4f81111e520c7c0825a6 100644 (file)
@@ -41,8 +41,11 @@ static ggml_tensor * causal_conv1d(ggml_cgraph * gf, ggml_context * ctx0, ggml_t
         conv_x->nb[1], conv_x->nb[2], n_seq_tokens * conv_x->nb[0]);
     ggml_build_forward_expand(gf,
         ggml_cpy(ctx0, last_conv_x,
-            ggml_view_1d(ctx0, conv_states_all, conv_state_size * n_seqs,
-                (kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all))));
+            ggml_view_3d(ctx0, conv_states_all,
+                d_conv - 1, d_inner, n_seqs,
+                (d_conv - 1) * ggml_element_size(conv_states_all),           // nb1: contiguous within one channel's conv taps
+                n_embd_r_total * ggml_element_size(conv_states_all),         // nb2: stride between sequences (skip over K,V states)
+                (kv_head * n_embd_r_total + qkv * conv_state_size) * ggml_element_size(conv_states_all))));  // offset to first seq's Q/K/V state
     // Reshape conv weight: GGUF [d_conv, 1, d_inner, 1] -> ggml_ssm_conv expects [d_conv, d_inner]
     // GGUF stores as [d_conv, 1, d_inner, 1] with memory layout w[conv_step + channel * d_conv]
     // vLLM stores as [d_inner, d_conv] with memory layout w[channel * d_conv + conv_step]