From: hipudding Date: Thu, 11 Sep 2025 07:59:37 +0000 (+0800) Subject: CANN: Disable acl_graph for prefill stage (llama/15933) X-Git-Tag: v0.9.1~45 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=db0cd47d36276a1f0dcae5927039a0c5b3420ca6;p=pkg%2Fggml%2Fsources%2Fggml CANN: Disable acl_graph for prefill stage (llama/15933) Since the prefill length is not fixed, graphs constructed for the prefill stage cannot be reused. For this reason, ACL graph execution is disabled by default during prefill. --- diff --git a/src/ggml-cann/ggml-cann.cpp b/src/ggml-cann/ggml-cann.cpp index d148174f..19a18a28 100755 --- a/src/ggml-cann/ggml-cann.cpp +++ b/src/ggml-cann/ggml-cann.cpp @@ -2360,6 +2360,21 @@ static enum ggml_status ggml_backend_cann_graph_compute( bool use_cann_graph = true; bool cann_graph_update_required = false; + static bool prefill_use_graph = parse_bool(get_env("GGML_CANN_PREFILL_USE_GRAPH").value_or("")); + if (!prefill_use_graph) { + // Do not use acl_graph for prefill. + for (int i = 0; i < cgraph->n_nodes; i++) { + ggml_tensor * node = cgraph->nodes[i]; + // TODO: Optimize here. Currently, we can only + // get seq_len by FA's input. + if (node->op == GGML_OP_FLASH_ATTN_EXT) { + // Q -> src[0], shape: [B, S, N, D] + use_cann_graph = (node->src[0]->ne[1] == 1); + break; + } + } + } + if (!cann_ctx->acl_graph_mode) { use_cann_graph = false; }