From: Georgi Gerganov Date: Sat, 3 Jan 2026 21:59:06 +0000 (+0200) Subject: graph : fix graph reuse logic when `n_pos_per_embd > 1` (#18566) X-Git-Tag: upstream/0.0.7721~99 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c69c7ebc901acfce2e0a4cacaf3f10085741c5d4;p=pkg%2Fggml%2Fsources%2Fllama.cpp graph : fix graph reuse logic when `n_pos_per_embd > 1` (#18566) --- diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 1d0d7197..8edf7d74 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -32,7 +32,7 @@ bool llm_graph_input_embd::can_reuse(const llm_graph_params & params) { bool res = true; res &= (!tokens && !params.ubatch.token) || (tokens && tokens->ne[0] == params.ubatch.n_tokens); - res &= (!embd && !params.ubatch.embd) || (embd && embd->ne[0] == params.ubatch.n_tokens); + res &= (!embd && !params.ubatch.embd) || (embd && embd->ne[1] == params.ubatch.n_tokens); return res; } @@ -62,7 +62,7 @@ void llm_graph_input_pos::set_input(const llama_ubatch * ubatch) { bool llm_graph_input_pos::can_reuse(const llm_graph_params & params) { bool res = true; - res &= pos->ne[0] == params.ubatch.n_tokens; + res &= pos->ne[0] == params.ubatch.n_tokens*n_pos_per_embd; return res; }