CANN: Add L2_NORM op support (llama/16856)

author TecJesh <redacted>

Wed, 12 Nov 2025 07:11:42 +0000 (15:11 +0800)

committer Georgi Gerganov <redacted>

Mon, 17 Nov 2025 13:34:43 +0000 (15:34 +0200)
author TecJesh <redacted>
Wed, 12 Nov 2025 07:11:42 +0000 (15:11 +0800)
committer Georgi Gerganov <redacted>
Mon, 17 Nov 2025 13:34:43 +0000 (15:34 +0200)
diff --git a/src/ggml-cann/aclnn_ops.cpp b/src/ggml-cann/aclnn_ops.cpp

index 5df6dc96a3b2e9c013c1cb8faf0503ebb039c027..4835c5c0387d1e4f4b6026a138f2dbf64c698a69 100644 (file)
--- a/src/ggml-cann/aclnn_ops.cpp
+++ b/src/ggml-cann/aclnn_ops.cpp
@@ -448,6 +448,35 @@ void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_cann_release_resources(ctx, norm, acl_src, acl_dst);
  }
  
+void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
+    ggml_tensor * src = dst->src[0];
+
+    aclTensor * acl_src = ggml_cann_create_tensor(src);
+    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+
+    size_t  type_size = ggml_type_size(src->type);
+    int64_t n_bytes   = src->ne[3]* src->ne[2]* src->ne[1]* type_size;
+    ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes);
+    void *               buffer       = temp_buffer_allocator.get();
+
+    int64_t div_ne[] = {1, src->ne[1], src->ne[2], src->ne[3]};
+    size_t  div_nb[GGML_MAX_DIMS];
+    div_nb[0] = sizeof(float);
+    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
+        div_nb[i] = div_nb[i - 1] * div_ne[i - 1];
+    }
+    aclTensor *          acl_div      = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, div_ne, div_nb, GGML_MAX_DIMS);
+
+    std::vector<int64_t> norm_dims = { 3 };
+    aclIntArray * dims_array = aclCreateIntArray(norm_dims.data(), norm_dims.size());
+
+    float p_value = 2.0f;
+    aclScalar * p_scalar = aclCreateScalar(&p_value, aclDataType::ACL_FLOAT);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Norm, acl_src, p_scalar, dims_array, true, acl_div);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src, acl_div, acl_dst);
+    ggml_cann_release_resources(ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
+}
+
  void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
  
diff --git a/src/ggml-cann/aclnn_ops.h b/src/ggml-cann/aclnn_ops.h

index ec7455af88cd57e00008538a3ac56848a53ad557..060eedbbb0282bbf2c352697427b328fb30e31c1 100644 (file)
--- a/src/ggml-cann/aclnn_ops.h
+++ b/src/ggml-cann/aclnn_ops.h
@@ -46,6 +46,7 @@
  #include <aclnnop/aclnn_cos.h>
  #include <aclnnop/aclnn_log.h>
  #include <aclnnop/aclnn_sign.h>
+#include <aclnnop/aclnn_norm.h>
  #include "acl_tensor.h"
  #include "common.h"
  
@@ -187,6 +188,29 @@ void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
   */
  void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
  
+/**
+ * @brief   Computes the L2 Normalization for a ggml tensor using the CANN
+ *          backend.
+ *
+ * @details This function applies the L2 Normalization operation on the
+ *          input tensor `src` and stores the result in the destination tensor
+ *          `dst`. L2 Normalization scales the input tensor such that the
+ *          L2 norm along the specified dimension equals 1. This operation
+ *          is commonly used in neural networks for feature normalization
+ *          and vector scaling.
+ *          The operation is defined as:
+ *          \f[
+ *              \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
+ *          \f]
+ *          The normalization is performed along the last dimension by default.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the normalized values will be stored.
+ * @attention The normalization is performed along the last dimension of the
+ *            input tensor by default.
+ */
+void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
+
  /**
   * @brief  Computes the Group Normalization for a ggml tensor using the CANN
   *         backend.
diff --git a/src/ggml-cann/ggml-cann.cpp b/src/ggml-cann/ggml-cann.cpp

index 51345742ee59eb2d5a5e14a1ab74d59b362b4e0d..9de9440ac65024307d095f914c4c1495c48246c1 100644 (file)
--- a/src/ggml-cann/ggml-cann.cpp
+++ b/src/ggml-cann/ggml-cann.cpp
@@ -1777,6 +1777,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
          case GGML_OP_GROUP_NORM:
              ggml_cann_group_norm(ctx, dst);
              break;
+        case GGML_OP_L2_NORM:
+            ggml_cann_l2_norm(ctx, dst);
+            break;
          case GGML_OP_CONCAT:
              ggml_cann_concat(ctx, dst);
              break;
@@ -2515,6 +2518,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
                  // value of paddingW should be at most half of kernelW
                  return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
              }
+        case GGML_OP_L2_NORM:
          case GGML_OP_DUP:
          case GGML_OP_SUM:
          case GGML_OP_IM2COL:
author	TecJesh <redacted>
	Wed, 12 Nov 2025 07:11:42 +0000 (15:11 +0800)
committer	Georgi Gerganov <redacted>
	Mon, 17 Nov 2025 13:34:43 +0000 (15:34 +0200)
src/ggml-cann/aclnn_ops.cpp		patch \| blob \| history
src/ggml-cann/aclnn_ops.h		patch \| blob \| history
src/ggml-cann/ggml-cann.cpp		patch \| blob \| history