ggml : add epsilon as a parameter for group_norm (llama/8818)

author Molly Sophia <redacted>

Tue, 6 Aug 2024 07:26:46 +0000 (15:26 +0800)

committer Georgi Gerganov <redacted>

Thu, 8 Aug 2024 19:48:46 +0000 (22:48 +0300)
author Molly Sophia <redacted>
Tue, 6 Aug 2024 07:26:46 +0000 (15:26 +0800)
committer Georgi Gerganov <redacted>
Thu, 8 Aug 2024 19:48:46 +0000 (22:48 +0300)
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h

index a9e88e592d51c8b89a6be84855c9de751566ff9f..15602a96df7ad3ef4df675d2a786a51d888c6cdb 100644 (file)
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1140,16 +1140,17 @@ extern "C" {
  
      // group normalize along ne0*ne1*n_groups
      // used in stable-diffusion
-    // TODO: eps is hardcoded to 1e-6 for now
      GGML_API struct ggml_tensor * ggml_group_norm(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
-            int                   n_groups);
+            int                   n_groups,
+            float                 eps);
  
      GGML_API struct ggml_tensor * ggml_group_norm_inplace(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
-            int                   n_groups);
+            int                   n_groups,
+            float                 eps);
  
      // a - x
      // b - dy
diff --git a/ggml/src/ggml-cuda/norm.cu b/ggml/src/ggml-cuda/norm.cu

index 30866d51274fb2c6a7f0db4301d216dff2320464..133e219f0aeda890bd2de2026c424e882e6643e8 100644 (file)
--- a/ggml/src/ggml-cuda/norm.cu
+++ b/ggml/src/ggml-cuda/norm.cu
@@ -142,8 +142,7 @@ static void norm_f32_cuda(const float * x, float * dst, const int ncols, const i
      }
  }
  
-static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const int group_size, const int ne_elements, cudaStream_t stream) {
-    static const float eps = 1e-6f;
+static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const float eps, const int group_size, const int ne_elements, cudaStream_t stream) {
      if (group_size < 1024) {
          const dim3 block_dims(WARP_SIZE, 1, 1);
          group_norm_f32<WARP_SIZE><<<num_groups, block_dims, 0, stream>>>(x, dst, group_size, ne_elements, eps);
@@ -196,8 +195,12 @@ void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
      GGML_ASSERT( dst->type == GGML_TYPE_F32);
  
      int num_groups = dst->op_params[0];
+
+    float eps;
+    memcpy(&eps, dst->op_params + 1, sizeof(float));
+
      int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups);
-    group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], group_size, ggml_nelements(src0), stream);
+    group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], eps, group_size, ggml_nelements(src0), stream);
  }
  
  void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m

index 3396f291209c4701a6f15e342932c3e36dbf6889..9fc08ab3aa5cc718abd977127e7663a32f719202 100644 (file)
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@@ -2236,10 +2236,8 @@ static enum ggml_status ggml_metal_graph_compute(
                          GGML_ASSERT(ne00 % 4 == 0);
                          GGML_ASSERT(ggml_is_contiguous(src0));
  
-                        //float eps;
-                        //memcpy(&eps, dst->op_params, sizeof(float));
-
-                        const float eps = 1e-6f; // TODO: temporarily hardcoded
+                        float eps;
+                        memcpy(&eps, dst->op_params + 1, sizeof(float));
  
                          const int32_t n_groups = ((int32_t *) dst->op_params)[0];
  
diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp

index cccf87d069a31a57edf005a0c38a02a73a907ffa..b3159b9d1b94d63db4ea421171da0fce372d8f50 100644 (file)
--- a/ggml/src/ggml-sycl/norm.cpp
+++ b/ggml/src/ggml-sycl/norm.cpp
@@ -225,9 +225,8 @@ static void norm_f32_sycl(const float* x, float* dst, const int ncols,
  }
  
  static void group_norm_f32_sycl(const float* x, float* dst,
-    const int num_groups, const int group_size,
+    const int num_groups, const float eps, const int group_size,
      const int ne_elements, queue_ptr stream, int device) {
-    static const float eps = 1e-6f;
      if (group_size < 1024) {
          const sycl::range<3> block_dims(1, 1, WARP_SIZE);
          stream->submit([&](sycl::handler& cgh) {
@@ -343,8 +342,12 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor*
      GGML_ASSERT(dst->type == GGML_TYPE_F32);
  
      int num_groups = dst->op_params[0];
+
+    float eps;
+    memcpy(&eps, dst->op_params + 1, sizeof(float));
+
      int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups);
-    group_norm_f32_sycl(src0_dd, dst_dd, num_groups, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device);
+    group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device);
  
      (void)src1;
      (void)dst;
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index 2f96e76f8b923d9ab434ae90c0c56da5105b868f..c937b5e537c54c2e7772776e96ac7e6720e2943d 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -5377,6 +5377,7 @@ static struct ggml_tensor * ggml_group_norm_impl(
      struct ggml_context * ctx,
      struct ggml_tensor * a,
      int n_groups,
+    float eps,
      bool inplace) {
  
      bool is_node = false;
@@ -5387,7 +5388,8 @@ static struct ggml_tensor * ggml_group_norm_impl(
  
      struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  
-    result->op_params[0] = n_groups;
+    ggml_set_op_params_i32(result, 0, n_groups);
+    ggml_set_op_params_f32(result, 1, eps);
  
      result->op = GGML_OP_GROUP_NORM;
      result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -5399,15 +5401,17 @@ static struct ggml_tensor * ggml_group_norm_impl(
  struct ggml_tensor * ggml_group_norm(
      struct ggml_context * ctx,
      struct ggml_tensor * a,
-    int n_groups) {
-    return ggml_group_norm_impl(ctx, a, n_groups, false);
+    int n_groups,
+    float eps) {
+    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
  }
  
  struct ggml_tensor * ggml_group_norm_inplace(
      struct ggml_context * ctx,
      struct ggml_tensor * a,
-    int n_groups) {
-    return ggml_group_norm_impl(ctx, a, n_groups, true);
+    int n_groups,
+    float eps) {
+    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
  }
  
  // ggml_mul_mat
@@ -12098,10 +12102,11 @@ static void ggml_compute_forward_group_norm_f32(
  
      GGML_TENSOR_UNARY_OP_LOCALS
  
-    const float eps = 1e-6f; // TODO: make this a parameter
-
      // TODO: optimize
  
+    float eps;
+    memcpy(&eps, dst->op_params + 1, sizeof(float));
+
      int n_channels = src0->ne[2];
      int n_groups = dst->op_params[0];
      int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
author	Molly Sophia <redacted>
	Tue, 6 Aug 2024 07:26:46 +0000 (15:26 +0800)
committer	Georgi Gerganov <redacted>
	Thu, 8 Aug 2024 19:48:46 +0000 (22:48 +0300)
ggml/include/ggml.h		patch \| blob \| history
ggml/src/ggml-cuda/norm.cu		patch \| blob \| history
ggml/src/ggml-metal.m		patch \| blob \| history
ggml/src/ggml-sycl/norm.cpp		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history