From: Sigbjørn Skjæret Date: Wed, 10 Sep 2025 17:08:59 +0000 (+0200) Subject: graph : support non-contiguous Q in build_attn_mha (#15908) X-Git-Tag: upstream/0.0.6527~84 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=6ab397e12ba8e9f776341cdae68f7ffb2f8d2cde;p=pkg%2Fggml%2Fsources%2Fllama.cpp graph : support non-contiguous Q in build_attn_mha (#15908) * support non-contiguous Q in build_attn_mha * Update src/llama-graph.cpp ggml-ci Co-authored-by: Georgi Gerganov --------- Co-authored-by: Georgi Gerganov --- diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 7f254b25..ddc772b1 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -1273,7 +1273,7 @@ ggml_tensor * llm_graph_context::build_attn_mha( // split the batch into streams if needed const auto n_stream = k->ne[3]; - q = ggml_reshape_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream); + q = ggml_view_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream, q->nb[1], q->nb[2], q->nb[3]/n_stream, 0); q = ggml_permute(ctx0, q, 0, 2, 1, 3); k = ggml_permute(ctx0, k, 0, 2, 1, 3);