From: slaren Date: Mon, 22 Jan 2024 22:42:41 +0000 (+0100) Subject: llama : fix not enough space in buffer with Qwen (#5086) X-Git-Tag: upstream/0.0.4488~2533 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=011e8ec577fd135cbc02993d3ea9840c516d6a1c;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : fix not enough space in buffer with Qwen (#5086) --- diff --git a/llama.cpp b/llama.cpp index 8c906a22..f6f1ec0f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4440,9 +4440,9 @@ static struct ggml_tensor * llm_build_kv( // these nodes are added to the graph together so that they are not reordered // by doing so, the number of splits in the graph is reduced + ggml_build_forward_expand(graph, q_cur); ggml_build_forward_expand(graph, k_cur); ggml_build_forward_expand(graph, v_cur); - ggml_build_forward_expand(graph, q_cur); llm_build_kv_store(ctx, hparams, kv, graph, k_cur, v_cur, n_ctx, n_tokens, kv_head, cb, il);