]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
graph : support non-contiguous Q in build_attn_mha (#15908)
authorSigbjørn Skjæret <redacted>
Wed, 10 Sep 2025 17:08:59 +0000 (19:08 +0200)
committerGitHub <redacted>
Wed, 10 Sep 2025 17:08:59 +0000 (19:08 +0200)
* support non-contiguous Q in build_attn_mha

* Update src/llama-graph.cpp

ggml-ci

Co-authored-by: Georgi Gerganov <redacted>
---------

Co-authored-by: Georgi Gerganov <redacted>
src/llama-graph.cpp

index 7f254b25cd451dfa5c1d7ba09595a0d9ed2ae19c..ddc772b179f7e713968533b0d863de8fb4b543d1 100644 (file)
@@ -1273,7 +1273,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
     // split the batch into streams if needed
     const auto n_stream = k->ne[3];
 
-    q = ggml_reshape_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream);
+    q = ggml_view_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream, q->nb[1], q->nb[2], q->nb[3]/n_stream, 0);
 
     q = ggml_permute(ctx0, q, 0, 2, 1, 3);
     k = ggml_permute(ctx0, k, 0, 2, 1, 3);