ggml : sync (custom ops) (#2537)

author Georgi Gerganov <redacted>

Mon, 7 Aug 2023 10:20:09 +0000 (13:20 +0300)

committer GitHub <redacted>

Mon, 7 Aug 2023 10:20:09 +0000 (13:20 +0300)
author Georgi Gerganov <redacted>
Mon, 7 Aug 2023 10:20:09 +0000 (13:20 +0300)
committer GitHub <redacted>
Mon, 7 Aug 2023 10:20:09 +0000 (13:20 +0300)
diff --git a/ggml.c b/ggml.c

index fa0f98aa09df202e63dd89cb8ccf3538f3de38eb..b4a36524021f9b468cfd227b987d1a29b289c3cc 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -195,8 +195,8 @@ typedef void * thread_ret_t;
  #define GGML_ALIGNED_MALLOC(size)  _aligned_malloc(size, GGML_MEM_ALIGN)
  #define GGML_ALIGNED_FREE(ptr)     _aligned_free(ptr)
  #else
-inline static void* ggml_aligned_malloc(size_t size) {
-    void* aligned_memory = NULL;
+inline static void * ggml_aligned_malloc(size_t size) {
+    void * aligned_memory = NULL;
  #ifdef GGML_USE_METAL
      int result = posix_memalign(&aligned_memory, getpagesize(), size);
  #else
@@ -3811,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
      "CROSS_ENTROPY_LOSS_BACK",
  };
  
-static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
+static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
  
  static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
      "none",
@@ -3883,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
      "cross_entropy_loss_back(x,y)",
  };
  
-static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
+static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
  
  static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
  
@@ -4253,7 +4253,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
          tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
  }
  
-static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
+bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
      static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  
      return
@@ -6890,7 +6890,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
          ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
          a->ne[2], 1, 1,
      };
-    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
  
      int32_t params[] = { s0, p0, d0 };
      ggml_set_op_params(result, &params, sizeof(params));
@@ -6905,10 +6905,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
  
  // ggml_conv_2d
  
-struct ggml_tensor* ggml_conv_2d(
-    struct ggml_context* ctx,
-    struct ggml_tensor * a,
-    struct ggml_tensor * b,
+struct ggml_tensor * ggml_conv_2d(
+    struct ggml_context * ctx,
+    struct ggml_tensor  * a,
+    struct ggml_tensor  * b,
      int                  s0,
      int                  s1,
      int                  p0,
@@ -6929,7 +6929,7 @@ struct ggml_tensor* ggml_conv_2d(
          ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
          a->ne[3], b->ne[3],
      };
-    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
+    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  
      int32_t params[] = { s0, s1, p0, p1, d0, d1 };
      ggml_set_op_params(result, &params, sizeof(params));
@@ -6945,7 +6945,7 @@ struct ggml_tensor* ggml_conv_2d(
  
  // ggml_conv_1d_ph
  
-struct ggml_tensor* ggml_conv_1d_ph(
+struct ggml_tensor * ggml_conv_1d_ph(
          struct ggml_context * ctx,
          struct ggml_tensor  * a,
          struct ggml_tensor  * b,
@@ -6963,7 +6963,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
  
  // ggml_pool_1d
  
-struct ggml_tensor* ggml_pool_1d(
+struct ggml_tensor * ggml_pool_1d(
          struct ggml_context * ctx,
          struct ggml_tensor  * a,
          enum ggml_op_pool     op,
@@ -6982,7 +6982,7 @@ struct ggml_tensor* ggml_pool_1d(
          ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
          a->ne[1],
      };
-    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
  
      int32_t params[] = { op, k0, s0, p0 };
      ggml_set_op_params(result, &params, sizeof(params));
@@ -6996,7 +6996,7 @@ struct ggml_tensor* ggml_pool_1d(
  
  // ggml_pool_2d
  
-struct ggml_tensor* ggml_pool_2d(
+struct ggml_tensor * ggml_pool_2d(
          struct ggml_context * ctx,
          struct ggml_tensor  * a,
          enum ggml_op_pool     op,
@@ -7019,7 +7019,7 @@ struct ggml_tensor* ggml_pool_2d(
          ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
          a->ne[2],
      };
-    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
+    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
  
      int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
      ggml_set_op_params(result, &params, sizeof(params));
@@ -7349,7 +7349,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
      return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
  }
  
-// ggml_map_custom1
+// ggml_map_custom1_f32
  
  static struct ggml_tensor * ggml_map_custom1_impl_f32(
          struct ggml_context          * ctx,
@@ -7366,7 +7366,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
  
      ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  
-    result->op = GGML_OP_MAP_CUSTOM1;
+    result->op = GGML_OP_MAP_CUSTOM1_F32;
      result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
      result->src[0] = a;
  
@@ -7387,7 +7387,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
      return ggml_map_custom1_impl_f32(ctx, a, fun, true);
  }
  
-// ggml_map_custom2
+// ggml_map_custom2_f32
  
  static struct ggml_tensor * ggml_map_custom2_impl_f32(
          struct ggml_context          * ctx,
@@ -7405,7 +7405,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
  
      ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  
-    result->op = GGML_OP_MAP_CUSTOM2;
+    result->op = GGML_OP_MAP_CUSTOM2_F32;
      result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
      result->src[0] = a;
      result->src[1] = b;
@@ -7429,7 +7429,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
      return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
  }
  
-// ggml_map_custom3
+// ggml_map_custom3_f32
  
  static struct ggml_tensor * ggml_map_custom3_impl_f32(
          struct ggml_context          * ctx,
@@ -7448,7 +7448,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
  
      ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  
-    result->op = GGML_OP_MAP_CUSTOM3;
+    result->op = GGML_OP_MAP_CUSTOM3_F32;
      result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
      result->src[0] = a;
      result->src[1] = b;
@@ -7475,6 +7475,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
      return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
  }
  
+// ggml_map_custom1
+struct ggml_map_custom1_op_params {
+    ggml_custom1_op_t fun;
+    int n_tasks;
+    void * userdata;
+};
+
+static struct ggml_tensor * ggml_map_custom1_impl(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        const  ggml_custom1_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata,
+        bool                           inplace) {
+    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
+
+    bool is_node = false;
+
+    if (!inplace && a->grad) {
+        is_node = true;
+    }
+
+    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+    struct ggml_map_custom1_op_params params = {
+        /*.fun      =*/ fun,
+        /*.n_tasks  =*/ n_tasks,
+        /*.userdata =*/ userdata
+    };
+    ggml_set_op_params(result, (const void *) &params, sizeof(params));
+
+    result->op = GGML_OP_MAP_CUSTOM1;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_map_custom1(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        const  ggml_custom1_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata) {
+    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
+}
+
+struct ggml_tensor * ggml_map_custom1_inplace(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        const  ggml_custom1_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata) {
+    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
+}
+
+// ggml_map_custom2
+
+struct ggml_map_custom2_op_params {
+    ggml_custom2_op_t fun;
+    int n_tasks;
+    void * userdata;
+};
+
+static struct ggml_tensor * ggml_map_custom2_impl(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        struct ggml_tensor           * b,
+        const  ggml_custom2_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata,
+        bool                           inplace) {
+    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
+
+    bool is_node = false;
+
+    if (!inplace && (a->grad || b->grad)) {
+        is_node = true;
+    }
+
+    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+    struct ggml_map_custom2_op_params params = {
+        /*.fun      =*/ fun,
+        /*.n_tasks  =*/ n_tasks,
+        /*.userdata =*/ userdata
+    };
+    ggml_set_op_params(result, (const void *) &params, sizeof(params));
+
+    result->op = GGML_OP_MAP_CUSTOM2;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+    result->src[1] = b;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_map_custom2(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        struct ggml_tensor           * b,
+        const  ggml_custom2_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata) {
+    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
+}
+
+struct ggml_tensor * ggml_map_custom2_inplace(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        struct ggml_tensor           * b,
+        const  ggml_custom2_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata) {
+    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
+}
+
+// ggml_map_custom3
+
+struct ggml_map_custom3_op_params {
+    ggml_custom3_op_t fun;
+    int n_tasks;
+    void * userdata;
+};
+
+static struct ggml_tensor * ggml_map_custom3_impl(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        struct ggml_tensor           * b,
+        struct ggml_tensor           * c,
+        const  ggml_custom3_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata,
+        bool                           inplace) {
+    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
+
+    bool is_node = false;
+
+    if (!inplace && (a->grad || b->grad || c->grad)) {
+        is_node = true;
+    }
+
+    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+    struct ggml_map_custom3_op_params params = {
+        /*.fun      =*/ fun,
+        /*.n_tasks  =*/ n_tasks,
+        /*.userdata =*/ userdata
+    };
+    ggml_set_op_params(result, (const void *) &params, sizeof(params));
+
+    result->op = GGML_OP_MAP_CUSTOM3;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+    result->src[1] = b;
+    result->src[2] = c;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_map_custom3(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        struct ggml_tensor           * b,
+        struct ggml_tensor           * c,
+        const  ggml_custom3_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata) {
+    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
+}
+
+struct ggml_tensor * ggml_map_custom3_inplace(
+        struct ggml_context          * ctx,
+        struct ggml_tensor           * a,
+        struct ggml_tensor           * b,
+        struct ggml_tensor           * c,
+        const  ggml_custom3_op_t       fun,
+        int                            n_tasks,
+        void                         * userdata) {
+    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
+}
+
+
+
  // ggml_cross_entropy_loss
  
  struct ggml_tensor * ggml_cross_entropy_loss(
@@ -9283,8 +9467,8 @@ static void ggml_compute_forward_sum_rows_f32(
      for (int64_t i3 = 0; i3 < ne03; i3++) {
          for (int64_t i2 = 0; i2 < ne02; i2++) {
              for (int64_t i1 = 0; i1 < ne01; i1++) {
-                float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
-                float* dst_row = (float *) ((char *) dst->data  + i1*nb1  + i2*nb2  + i3*nb3);
+                float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
+                float * dst_row = (float *) ((char *) dst->data  + i1*nb1  + i2*nb2  + i3*nb3);
                  float row_sum = 0;
                  ggml_vec_sum_f32(ne00, &row_sum, src_row);
                  dst_row[0] = row_sum;
@@ -12894,7 +13078,7 @@ static void ggml_compute_forward_pool_1d(
          const struct ggml_tensor * src0,
                struct ggml_tensor * dst) {
  
-    const int32_t* opts = (const int32_t*)dst->op_params;
+    const int32_t * opts = (const int32_t *)dst->op_params;
      enum ggml_op_pool op = opts[0];
      const int k0 = opts[1];
      const int s0 = opts[2];
@@ -14227,24 +14411,6 @@ static void ggml_compute_forward_map_custom1_f32(
      fun(dst, a);
  }
  
-
-static void ggml_compute_forward_map_custom1(
-        const struct ggml_compute_params * params,
-        const struct ggml_tensor * a,
-        struct ggml_tensor * dst,
-        const ggml_custom1_op_f32_t fun) {
-    switch (a->type) {
-        case GGML_TYPE_F32:
-            {
-                ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
-            } break;
-        default:
-            {
-                GGML_ASSERT(false);
-            } break;
-    }
-}
-
  // ggml_compute_forward_map_custom2
  
  static void ggml_compute_forward_map_custom2_f32(
@@ -14263,24 +14429,6 @@ static void ggml_compute_forward_map_custom2_f32(
  }
  
  
-static void ggml_compute_forward_map_custom2(
-        const struct ggml_compute_params * params,
-        const struct ggml_tensor * a,
-        const struct ggml_tensor * b,
-        struct ggml_tensor * dst,
-        const ggml_custom2_op_f32_t fun) {
-    switch (a->type) {
-        case GGML_TYPE_F32:
-            {
-                ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
-            } break;
-        default:
-            {
-                GGML_ASSERT(false);
-            } break;
-    }
-}
-
  // ggml_compute_forward_map_custom3
  
  static void ggml_compute_forward_map_custom3_f32(
@@ -14299,24 +14447,52 @@ static void ggml_compute_forward_map_custom3_f32(
      fun(dst, a, b, c);
  }
  
+// ggml_compute_forward_map_custom1
+
+static void ggml_compute_forward_map_custom1(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * a,
+              struct ggml_tensor * dst) {
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
+
+    p->fun(dst, a, params->ith, params->nth, p->userdata);
+}
+
+// ggml_compute_forward_map_custom2
+
+static void ggml_compute_forward_map_custom2(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * a,
+        const struct ggml_tensor * b,
+              struct ggml_tensor * dst) {
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
+
+    p->fun(dst, a, b, params->ith, params->nth, p->userdata);
+}
+
+// ggml_compute_forward_map_custom3
  
  static void ggml_compute_forward_map_custom3(
          const struct ggml_compute_params * params,
          const struct ggml_tensor * a,
          const struct ggml_tensor * b,
          const struct ggml_tensor * c,
-        struct ggml_tensor * dst,
-        const ggml_custom3_op_f32_t fun) {
-    switch (a->type) {
-        case GGML_TYPE_F32:
-            {
-                ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
-            } break;
-        default:
-            {
-                GGML_ASSERT(false);
-            } break;
+              struct ggml_tensor * dst) {
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
      }
+
+    struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
+
+    p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
  }
  
  // ggml_compute_forward_cross_entropy_loss
@@ -14838,25 +15014,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
                  ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
              }
              break;
-        case GGML_OP_MAP_CUSTOM1:
+        case GGML_OP_MAP_CUSTOM1_F32:
              {
                  ggml_custom1_op_f32_t fun;
                  memcpy(&fun, tensor->op_params, sizeof(fun));
-                ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
+                ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
              }
              break;
-        case GGML_OP_MAP_CUSTOM2:
+        case GGML_OP_MAP_CUSTOM2_F32:
              {
                  ggml_custom2_op_f32_t fun;
                  memcpy(&fun, tensor->op_params, sizeof(fun));
-                ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
+                ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
              }
              break;
-        case GGML_OP_MAP_CUSTOM3:
+        case GGML_OP_MAP_CUSTOM3_F32:
              {
                  ggml_custom3_op_f32_t fun;
                  memcpy(&fun, tensor->op_params, sizeof(fun));
-                ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
+                ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
+            }
+            break;
+        case GGML_OP_MAP_CUSTOM1:
+            {
+                ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
+            }
+            break;
+        case GGML_OP_MAP_CUSTOM2:
+            {
+                ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
+            }
+            break;
+        case GGML_OP_MAP_CUSTOM3:
+            {
+                ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
              }
              break;
          case GGML_OP_CROSS_ENTROPY_LOSS:
@@ -15664,6 +15855,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
              } break;
          case GGML_OP_MAP_UNARY:
          case GGML_OP_MAP_BINARY:
+        case GGML_OP_MAP_CUSTOM1_F32:
+        case GGML_OP_MAP_CUSTOM2_F32:
+        case GGML_OP_MAP_CUSTOM3_F32:
          case GGML_OP_MAP_CUSTOM1:
          case GGML_OP_MAP_CUSTOM2:
          case GGML_OP_MAP_CUSTOM3:
@@ -16449,11 +16643,38 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
              case GGML_OP_WIN_UNPART:
              case GGML_OP_MAP_UNARY:
              case GGML_OP_MAP_BINARY:
+            case GGML_OP_MAP_CUSTOM1_F32:
+            case GGML_OP_MAP_CUSTOM2_F32:
+            case GGML_OP_MAP_CUSTOM3_F32:
+                {
+                    n_tasks = 1;
+                } break;
              case GGML_OP_MAP_CUSTOM1:
+                {
+                    struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
+                    if (p->n_tasks == GGML_N_TASKS_MAX) {
+                        n_tasks = n_threads;
+                    } else {
+                        n_tasks = MIN(p->n_tasks, n_threads);
+                    }
+                } break;
              case GGML_OP_MAP_CUSTOM2:
+                {
+                    struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
+                    if (p->n_tasks == GGML_N_TASKS_MAX) {
+                        n_tasks = n_threads;
+                    } else {
+                        n_tasks = MIN(p->n_tasks, n_threads);
+                    }
+                } break;
              case GGML_OP_MAP_CUSTOM3:
                  {
-                    n_tasks = 1;
+                    struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
+                    if (p->n_tasks == GGML_N_TASKS_MAX) {
+                        n_tasks = n_threads;
+                    } else {
+                        n_tasks = MIN(p->n_tasks, n_threads);
+                    }
                  } break;
              case GGML_OP_CROSS_ENTROPY_LOSS:
                  {
diff --git a/ggml.h b/ggml.h

index aba92480c833c8394cfae8c4affde523d23401e6..bdbd12800433242dec1e6dac8c96875540dd19e7 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -183,6 +183,15 @@
  #    define GGML_API
  #endif
  
+// TODO: support for clang
+#ifdef __GNUC__
+#    define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
+#elif defined(_MSC_VER)
+#    define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
+#else
+#    define GGML_DEPRECATED(func, hint) func
+#endif
+
  #include <stdint.h>
  #include <stddef.h>
  #include <stdbool.h>
@@ -374,6 +383,10 @@ extern "C" {
          GGML_OP_MAP_UNARY,
          GGML_OP_MAP_BINARY,
  
+        GGML_OP_MAP_CUSTOM1_F32,
+        GGML_OP_MAP_CUSTOM2_F32,
+        GGML_OP_MAP_CUSTOM3_F32,
+
          GGML_OP_MAP_CUSTOM1,
          GGML_OP_MAP_CUSTOM2,
          GGML_OP_MAP_CUSTOM3,
@@ -570,6 +583,8 @@ extern "C" {
      GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
      GGML_API bool ggml_is_permuted  (const struct ggml_tensor * tensor);
  
+    GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
+
      // use this to compute the memory overhead of a tensor
      GGML_API size_t ggml_tensor_overhead(void);
  
@@ -1240,7 +1255,7 @@ extern "C" {
  
      // conv_1d with padding = half
      // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
-    GGML_API struct ggml_tensor* ggml_conv_1d_ph(
+    GGML_API struct ggml_tensor * ggml_conv_1d_ph(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
              struct ggml_tensor  * b,
@@ -1253,7 +1268,7 @@ extern "C" {
          GGML_OP_POOL_COUNT,
      };
  
-    GGML_API struct ggml_tensor* ggml_pool_1d(
+    GGML_API struct ggml_tensor * ggml_pool_1d(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
              enum ggml_op_pool     op,
@@ -1261,7 +1276,7 @@ extern "C" {
              int                   s0, // stride
              int                   p0); // padding
  
-    GGML_API struct ggml_tensor* ggml_pool_2d(
+    GGML_API struct ggml_tensor * ggml_pool_2d(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
              enum ggml_op_pool     op,
@@ -1315,15 +1330,6 @@ extern "C" {
              int                   h0,
              int                   w);
  
-    // custom operators
-
-    typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
-    typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
-
-    typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
-    typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
-    typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
-
      GGML_API struct ggml_tensor * ggml_unary(
              struct ggml_context * ctx,
               struct ggml_tensor * a,
@@ -1334,63 +1340,138 @@ extern "C" {
          struct ggml_tensor  * a,
          enum ggml_unary_op op);
  
-    GGML_API struct ggml_tensor * ggml_map_unary_f32(
+    // custom operators
+
+    typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
+    typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
+
+    typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
+    typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
+    typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
+
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
              struct ggml_context        * ctx,
              struct ggml_tensor         * a,
-                   ggml_unary_op_f32_t   fun);
+                   ggml_unary_op_f32_t   fun),
+        "use ggml_map_custom1 instead");
  
-    GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
              struct ggml_context        * ctx,
              struct ggml_tensor         * a,
-                   ggml_unary_op_f32_t   fun);
+                   ggml_unary_op_f32_t   fun),
+        "use ggml_map_custom1_inplace instead");
  
-    GGML_API struct ggml_tensor * ggml_map_binary_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
              struct ggml_context         * ctx,
              struct ggml_tensor          * a,
              struct ggml_tensor          * b,
-                   ggml_binary_op_f32_t   fun);
+                   ggml_binary_op_f32_t   fun),
+        "use ggml_map_custom2 instead");
  
-    GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
              struct ggml_context         * ctx,
              struct ggml_tensor          * a,
              struct ggml_tensor          * b,
-                   ggml_binary_op_f32_t   fun);
+                   ggml_binary_op_f32_t   fun),
+        "use ggml_map_custom2_inplace instead");
  
-    GGML_API struct ggml_tensor * ggml_map_custom1_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
              struct ggml_context          * ctx,
              struct ggml_tensor           * a,
-                   ggml_custom1_op_f32_t   fun);
+                   ggml_custom1_op_f32_t   fun),
+        "use ggml_map_custom1 instead");
  
-    GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
              struct ggml_context          * ctx,
              struct ggml_tensor           * a,
-                   ggml_custom1_op_f32_t   fun);
+                   ggml_custom1_op_f32_t   fun),
+        "use ggml_map_custom1_inplace instead");
  
-    GGML_API struct ggml_tensor * ggml_map_custom2_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
              struct ggml_context          * ctx,
              struct ggml_tensor           * a,
              struct ggml_tensor           * b,
-                   ggml_custom2_op_f32_t   fun);
+                   ggml_custom2_op_f32_t   fun),
+        "use ggml_map_custom2 instead");
  
-    GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
              struct ggml_context          * ctx,
              struct ggml_tensor           * a,
              struct ggml_tensor           * b,
-                   ggml_custom2_op_f32_t   fun);
+                   ggml_custom2_op_f32_t   fun),
+        "use ggml_map_custom2_inplace instead");
  
-    GGML_API struct ggml_tensor * ggml_map_custom3_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
              struct ggml_context          * ctx,
              struct ggml_tensor           * a,
              struct ggml_tensor           * b,
              struct ggml_tensor           * c,
-                   ggml_custom3_op_f32_t   fun);
+                   ggml_custom3_op_f32_t   fun),
+        "use ggml_map_custom3 instead");
  
-    GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
              struct ggml_context          * ctx,
              struct ggml_tensor           * a,
              struct ggml_tensor           * b,
              struct ggml_tensor           * c,
-                   ggml_custom3_op_f32_t   fun);
+                   ggml_custom3_op_f32_t   fun),
+        "use ggml_map_custom3_inplace instead");
+
+    // custom operators v2
+
+    typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
+    typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
+    typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
+
+    #define GGML_N_TASKS_MAX -1
+
+    GGML_API struct ggml_tensor * ggml_map_custom1(
+            struct ggml_context   * ctx,
+            struct ggml_tensor    * a,
+            ggml_custom1_op_t       fun,
+            int                     n_tasks,
+            void                  * userdata);
+
+    GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
+            struct ggml_context   * ctx,
+            struct ggml_tensor    * a,
+            ggml_custom1_op_t       fun,
+            int                     n_tasks,
+            void                  * userdata);
+
+    GGML_API struct ggml_tensor * ggml_map_custom2(
+            struct ggml_context   * ctx,
+            struct ggml_tensor    * a,
+            struct ggml_tensor    * b,
+            ggml_custom2_op_t       fun,
+            int                     n_tasks,
+            void                  * userdata);
+
+    GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
+            struct ggml_context   * ctx,
+            struct ggml_tensor    * a,
+            struct ggml_tensor    * b,
+            ggml_custom2_op_t       fun,
+            int                     n_tasks,
+            void                  * userdata);
+
+    GGML_API struct ggml_tensor * ggml_map_custom3(
+            struct ggml_context   * ctx,
+            struct ggml_tensor    * a,
+            struct ggml_tensor    * b,
+            struct ggml_tensor    * c,
+            ggml_custom3_op_t       fun,
+            int                     n_tasks,
+            void                  * userdata);
+
+    GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
+            struct ggml_context   * ctx,
+            struct ggml_tensor    * a,
+            struct ggml_tensor    * b,
+            struct ggml_tensor    * c,
+            ggml_custom3_op_t       fun,
+            int                     n_tasks,
+            void                  * userdata);
  
      // loss function
author	Georgi Gerganov <redacted>
	Mon, 7 Aug 2023 10:20:09 +0000 (13:20 +0300)
committer	GitHub <redacted>
	Mon, 7 Aug 2023 10:20:09 +0000 (13:20 +0300)