ggml : add `ELU`, `TANH`, `ARGMAX` (#316)

author PAB <redacted>

Sun, 2 Jul 2023 15:25:37 +0000 (17:25 +0200)

committer GitHub <redacted>

Sun, 2 Jul 2023 15:25:37 +0000 (18:25 +0300)
author PAB <redacted>
Sun, 2 Jul 2023 15:25:37 +0000 (17:25 +0200)
committer GitHub <redacted>
Sun, 2 Jul 2023 15:25:37 +0000 (18:25 +0300)
diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h

index fb3fec29cc6d6aa9f70b6f5ddca1c7d7ec0831e9..05d7f0a191a3f293826b211011dccea35412251b 100644 (file)
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -321,17 +321,20 @@ extern "C" {
          GGML_OP_SUM,
          GGML_OP_SUM_ROWS,
          GGML_OP_MEAN,
+        GGML_OP_ARGMAX,
          GGML_OP_REPEAT,
          GGML_OP_REPEAT_BACK,
          GGML_OP_ABS,
          GGML_OP_SGN,
          GGML_OP_NEG,
          GGML_OP_STEP,
+        GGML_OP_ELU,
          GGML_OP_RELU,
          GGML_OP_GELU,
          GGML_OP_GELU_QUICK,
          GGML_OP_SILU,
          GGML_OP_SILU_BACK,
+        GGML_OP_TANH,
          GGML_OP_NORM, // normalize
          GGML_OP_RMS_NORM,
          GGML_OP_RMS_NORM_BACK,
@@ -716,6 +719,11 @@ extern "C" {
              struct ggml_context * ctx,
              struct ggml_tensor  * a);
  
+    // argmax along rows
+    GGML_API struct ggml_tensor * ggml_argmax(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
      // if a is the same shape as b, and a is not parameter, return a
      // otherwise, return a new tensor: repeat(a) to fit in b
      GGML_API struct ggml_tensor * ggml_repeat(
@@ -760,6 +768,22 @@ extern "C" {
              struct ggml_context * ctx,
              struct ggml_tensor  * a);
  
+    GGML_API struct ggml_tensor * ggml_tanh(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_tanh_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_elu(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
+    GGML_API struct ggml_tensor * ggml_elu_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a);
+
      GGML_API struct ggml_tensor * ggml_relu(
              struct ggml_context * ctx,
              struct ggml_tensor  * a);
diff --git a/src/ggml.c b/src/ggml.c

index 7eaff45cc626ccb692e1ab5db0145c834d11c6e6..a1b2b1b5ff1eace9b3e5af3458dbfaf05b87b119 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -3465,6 +3465,8 @@ inline static void ggml_vec_log_f32  (const int n, float * y, const float * x) {
  inline static void ggml_vec_abs_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
  inline static void ggml_vec_sgn_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
  inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
+inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]);  }
+inline static void ggml_vec_elu_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
  inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
  
  static const float GELU_COEF_A    = 0.044715f;
@@ -3616,6 +3618,16 @@ inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x
      *s = 1.f/(*s);
  }
  
+inline static void ggml_vec_argmax_f32(const int n, int * s, const float * x) {
+    float max = -INFINITY;
+    int idx = 0;
+    for (int i = 0; i < n; ++i) {
+        max = MAX(max, x[i]);
+        if (max == x[i]) { idx = i; }
+    }
+    *s = idx;
+}
+
  //
  // data types
  //
@@ -3725,12 +3737,15 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
      "SUM",
      "SUM_ROWS",
      "MEAN",
+    "ARGMAX",
      "REPEAT",
      "REPEAT_BACK",
      "ABS",
      "SGN",
      "NEG",
      "STEP",
+    "TANH",
+    "ELU",
      "RELU",
      "GELU",
      "GELU_QUICK",
@@ -3783,7 +3798,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
      "CROSS_ENTROPY_LOSS_BACK",
  };
  
-static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
+static_assert(GGML_OP_COUNT == 67, "GGML_OP_COUNT != 67");
  
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
      "none",
@@ -3801,12 +3816,15 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
      "Σx",
      "Σx_k",
      "Σx/n",
+    "argmax(x)",
      "repeat(x)",
      "repeat_back(x)",
      "abs(x)",
      "sgn(x)",
      "-x",
      "step(x)",
+    "tanh(x)",
+    "elu(x)",
      "relu(x)",
      "gelu(x)",
      "gelu_quick(x)",
@@ -3859,7 +3877,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
      "cross_entropy_loss_back(x,y)",
  };
  
-static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
+static_assert(GGML_OP_COUNT == 67, "GGML_OP_COUNT != 67");
  
  static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
  static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
@@ -5458,6 +5476,30 @@ struct ggml_tensor * ggml_mean(
      return result;
  }
  
+// ggml_argmax
+
+struct ggml_tensor * ggml_argmax(
+        struct ggml_context * ctx,
+        struct ggml_tensor * a) {
+    GGML_ASSERT(ggml_is_matrix(a));
+    bool is_node = false;
+
+    if (a->grad) {
+        GGML_ASSERT(false);
+        is_node = true;
+    }
+
+    int64_t ne[GGML_MAX_DIMS] = { a->ne[1], 1, 1, 1 };
+    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, ne);
+
+    result->op   = GGML_OP_ARGMAX;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src0 = a;
+    result->src1 = NULL;
+
+    return result;
+}
+
  // ggml_repeat
  
  struct ggml_tensor * ggml_repeat(
@@ -5651,6 +5693,74 @@ struct ggml_tensor * ggml_step_inplace(
      return ggml_step_impl(ctx, a, true);
  }
  
+// ggml_tanh
+
+struct ggml_tensor * ggml_tanh_impl(
+        struct ggml_context * ctx,
+        struct ggml_tensor * a,
+        bool inplace) {
+    bool is_node = false;
+
+    if (!inplace && (a->grad)) {
+        is_node = true;
+    }
+
+    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+    result->op   = GGML_OP_TANH;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src0 = a;
+    result->src1 = NULL;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_tanh(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    return ggml_tanh_impl(ctx, a, false);
+}
+
+struct ggml_tensor * ggml_tanh_inplace(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    return ggml_tanh_impl(ctx, a, true);
+}
+
+// ggml_elu
+
+struct ggml_tensor * ggml_elu_impl(
+        struct ggml_context * ctx,
+        struct ggml_tensor * a,
+        bool inplace) {
+    bool is_node = false;
+
+    if (!inplace && (a->grad)) {
+        is_node = true;
+    }
+
+    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+    result->op   = GGML_OP_ELU;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src0 = a;
+    result->src1 = NULL;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_elu(
+    struct ggml_context * ctx,
+    struct ggml_tensor  * a) {
+    return ggml_elu_impl(ctx, a, false);
+}
+
+struct ggml_tensor * ggml_elu_inplace(
+    struct ggml_context * ctx,
+    struct ggml_tensor  * a) {
+    return ggml_elu_impl(ctx, a, true);
+}
+
  // ggml_relu
  
  struct ggml_tensor * ggml_relu_impl(
@@ -9393,6 +9503,52 @@ static void ggml_compute_forward_mean(
      }
  }
  
+// ggml_compute_forward_argmax
+
+static void ggml_compute_forward_argmax_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+    assert(params->ith == 0);
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    assert(src0->nb[0] == sizeof(float));
+    assert(dst->nb[0] == sizeof(float));
+
+    const int64_t ne00 = src0->ne[0];
+    const int64_t ne01 = src0->ne[1];
+
+    const size_t nb01 = src0->nb[1];
+    const size_t nb0 = dst->nb[0];
+
+    for (int64_t i1 = 0; i1 < ne01; i1++) {
+        float * src = (float *) ((char *) src0->data + i1*nb01);
+        int32_t * dst_ = (int32_t *) ((char *)  dst->data + i1*nb0);
+        int v = 0;
+        ggml_vec_argmax_f32(ne00, &v, src);
+        dst_[0] = v;
+    }
+}
+
+static void ggml_compute_forward_argmax(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+    switch (src0->type) {
+        case GGML_TYPE_F32:
+            {
+                ggml_compute_forward_argmax_f32(params, src0, dst);
+            } break;
+        default:
+            {
+                GGML_ASSERT(false);
+            } break;
+    }
+}
+
  // ggml_compute_forward_repeat
  
  static void ggml_compute_forward_repeat_f32(
@@ -9697,6 +9853,90 @@ static void ggml_compute_forward_step(
      }
  }
  
+// ggml_compute_forward_tanh
+
+static void ggml_compute_forward_tanh_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+    assert(params->ith == 0);
+    assert(ggml_are_same_shape(src0, dst));
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    const int n  = ggml_nrows(src0);
+    const int nc = src0->ne[0];
+
+    assert(dst->nb[0]  == sizeof(float));
+    assert(src0->nb[0] == sizeof(float));
+
+    for (int i = 0; i < n; i++) {
+        ggml_vec_tanh_f32(nc,
+                (float *) ((char *) dst->data  + i*( dst->nb[1])),
+                (float *) ((char *) src0->data + i*(src0->nb[1])));
+    }
+}
+
+static void ggml_compute_forward_tanh(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+    switch (src0->type) {
+        case GGML_TYPE_F32:
+            {
+                ggml_compute_forward_tanh_f32(params, src0, dst);
+            } break;
+        default:
+            {
+                GGML_ASSERT(false);
+            } break;
+    }
+}
+
+// ggml_compute_forward_elu
+
+static void ggml_compute_forward_elu_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+    assert(params->ith == 0);
+    assert(ggml_are_same_shape(src0, dst));
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    const int n  = ggml_nrows(src0);
+    const int nc = src0->ne[0];
+
+    assert(dst->nb[0]  == sizeof(float));
+    assert(src0->nb[0] == sizeof(float));
+
+    for (int i = 0; i < n; i++) {
+        ggml_vec_elu_f32(nc,
+                (float *) ((char *) dst->data  + i*( dst->nb[1])),
+                (float *) ((char *) src0->data + i*(src0->nb[1])));
+    }
+}
+
+static void ggml_compute_forward_elu(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+    switch (src0->type) {
+        case GGML_TYPE_F32:
+            {
+                ggml_compute_forward_elu_f32(params, src0, dst);
+            } break;
+        default:
+            {
+                GGML_ASSERT(false);
+            } break;
+    }
+}
+
  // ggml_compute_forward_relu
  
  static void ggml_compute_forward_relu_f32(
@@ -14670,6 +14910,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
              {
                  ggml_compute_forward_mean(params, tensor->src0, tensor);
              } break;
+        case GGML_OP_ARGMAX:
+            {
+                ggml_compute_forward_argmax(params, tensor->src0, tensor);
+            } break;
          case GGML_OP_REPEAT:
              {
                  ggml_compute_forward_repeat(params, tensor->src0, tensor);
@@ -14694,6 +14938,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
              {
                  ggml_compute_forward_step(params, tensor->src0, tensor);
              } break;
+        case GGML_OP_TANH:
+            {
+                ggml_compute_forward_tanh(params, tensor->src0, tensor);
+            } break;
+        case GGML_OP_ELU:
+            {
+                ggml_compute_forward_elu(params, tensor->src0, tensor);
+            } break;
          case GGML_OP_RELU:
              {
                  ggml_compute_forward_relu(params, tensor->src0, tensor);
@@ -15069,6 +15321,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                  }
              } break;
          case GGML_OP_MEAN:
+        case GGML_OP_ARGMAX:
              {
                  GGML_ASSERT(false); // TODO: implement
              } break;
@@ -15122,6 +15375,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                      // noop
                  }
              } break;
+        case GGML_OP_TANH:
+            {
+                GGML_ASSERT(false); // TODO: not implemented
+            } break;
+        case GGML_OP_ELU:
+            {
+                GGML_ASSERT(false); // TODO: not implemented
+            } break;
          case GGML_OP_RELU:
              {
                  if (src0->grad) {
@@ -15141,14 +15402,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
              {
                  GGML_ASSERT(false); // TODO: not implemented
              } break;
-        case GGML_OP_ALIBI:
-            {
-                GGML_ASSERT(false); // TODO: not implemented
-            } break;
-        case GGML_OP_CLAMP:
-            {
-                GGML_ASSERT(false); // TODO: not implemented
-            } break;
          case GGML_OP_SILU:
              {
                  // necessary for llama
@@ -15505,6 +15758,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                      // noop
                  }
              } break;
+        case GGML_OP_ALIBI:
+            {
+                GGML_ASSERT(false); // TODO: not implemented
+            } break;
+        case GGML_OP_CLAMP:
+            {
+                GGML_ASSERT(false); // TODO: not implemented
+            } break;
          case GGML_OP_CONV_1D_S1_PH:
              {
                  GGML_ASSERT(false); // TODO: not implemented
@@ -16170,12 +16431,15 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                  case GGML_OP_SUM:
                  case GGML_OP_SUM_ROWS:
                  case GGML_OP_MEAN:
+                case GGML_OP_ARGMAX:
                  case GGML_OP_REPEAT:
                  case GGML_OP_REPEAT_BACK:
                  case GGML_OP_ABS:
                  case GGML_OP_SGN:
                  case GGML_OP_NEG:
                  case GGML_OP_STEP:
+                case GGML_OP_TANH:
+                case GGML_OP_ELU:
                  case GGML_OP_RELU:
                      {
                          node->n_tasks = 1;
author	PAB <redacted>
	Sun, 2 Jul 2023 15:25:37 +0000 (17:25 +0200)
committer	GitHub <redacted>
	Sun, 2 Jul 2023 15:25:37 +0000 (18:25 +0300)
include/ggml/ggml.h		patch \| blob \| history
src/ggml.c		patch \| blob \| history