GGML_OP_UNARY,
- GGML_OP_MAP_UNARY,
- GGML_OP_MAP_BINARY,
-
- GGML_OP_MAP_CUSTOM1_F32,
- GGML_OP_MAP_CUSTOM2_F32,
- GGML_OP_MAP_CUSTOM3_F32,
-
GGML_OP_MAP_CUSTOM1,
GGML_OP_MAP_CUSTOM2,
GGML_OP_MAP_CUSTOM3,
+ GGML_OP_CUSTOM,
+
GGML_OP_CROSS_ENTROPY_LOSS,
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
GGML_OP_OPT_STEP_ADAMW,
// custom operators
- typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
- typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
-
- typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
- typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
- typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- ggml_unary_op_f32_t fun),
- "use ggml_map_custom1 instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- ggml_unary_op_f32_t fun),
- "use ggml_map_custom1_inplace instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- ggml_binary_op_f32_t fun),
- "use ggml_map_custom2 instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- ggml_binary_op_f32_t fun),
- "use ggml_map_custom2_inplace instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- ggml_custom1_op_f32_t fun),
- "use ggml_map_custom1 instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- ggml_custom1_op_f32_t fun),
- "use ggml_map_custom1_inplace instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- ggml_custom2_op_f32_t fun),
- "use ggml_map_custom2 instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- ggml_custom2_op_f32_t fun),
- "use ggml_map_custom2_inplace instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- struct ggml_tensor * c,
- ggml_custom3_op_f32_t fun),
- "use ggml_map_custom3 instead");
-
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- struct ggml_tensor * c,
- ggml_custom3_op_f32_t fun),
- "use ggml_map_custom3_inplace instead");
-
- // custom operators v2
-
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
int n_tasks,
void * userdata);
+ typedef void (*ggml_custom_op_t)(struct ggml_tensor * dst , int ith, int nth, void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_custom_4d(
+ struct ggml_context * ctx,
+ enum ggml_type type,
+ int64_t ne0,
+ int64_t ne1,
+ int64_t ne2,
+ int64_t ne3,
+ struct ggml_tensor ** args,
+ int n_args,
+ ggml_custom_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_custom_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor ** args,
+ int n_args,
+ ggml_custom_op_t fun,
+ int n_tasks,
+ void * userdata);
+
// loss function
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
{
ggml_compute_forward_rwkv_wkv7(params, tensor);
} break;
- case GGML_OP_MAP_UNARY:
- {
- ggml_unary_op_f32_t fun;
- memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_unary(params, tensor, fun);
- }
- break;
- case GGML_OP_MAP_BINARY:
- {
- ggml_binary_op_f32_t fun;
- memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_binary(params, tensor, fun);
- }
- break;
- case GGML_OP_MAP_CUSTOM1_F32:
- {
- ggml_custom1_op_f32_t fun;
- memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom1_f32(params, tensor, fun);
- }
- break;
- case GGML_OP_MAP_CUSTOM2_F32:
- {
- ggml_custom2_op_f32_t fun;
- memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom2_f32(params, tensor, fun);
- }
- break;
- case GGML_OP_MAP_CUSTOM3_F32:
- {
- ggml_custom3_op_f32_t fun;
- memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom3_f32(params, tensor, fun);
- }
- break;
case GGML_OP_MAP_CUSTOM1:
{
ggml_compute_forward_map_custom1(params, tensor);
ggml_compute_forward_map_custom3(params, tensor);
}
break;
+ case GGML_OP_CUSTOM:
+ {
+ ggml_compute_forward_custom(params, tensor);
+ }
+ break;
case GGML_OP_CROSS_ENTROPY_LOSS:
{
ggml_compute_forward_cross_entropy_loss(params, tensor);
case GGML_OP_WIN_PART:
case GGML_OP_WIN_UNPART:
case GGML_OP_GET_REL_POS:
- case GGML_OP_MAP_UNARY:
- case GGML_OP_MAP_BINARY:
- case GGML_OP_MAP_CUSTOM1_F32:
- case GGML_OP_MAP_CUSTOM2_F32:
- case GGML_OP_MAP_CUSTOM3_F32:
{
n_tasks = 1;
} break;
n_tasks = MIN(p.n_tasks, n_threads);
}
} break;
+ case GGML_OP_CUSTOM:
+ {
+ struct ggml_custom_op_params p;
+ memcpy(&p, node->op_params, sizeof(p));
+ if (p.n_tasks == GGML_N_TASKS_MAX) {
+ n_tasks = n_threads;
+ } else {
+ n_tasks = MIN(p.n_tasks, n_threads);
+ }
+ } break;
case GGML_OP_CROSS_ENTROPY_LOSS:
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
case GGML_OP_OPT_STEP_ADAMW:
}
}
-// ggml_compute_forward_map_unary
-
-static void ggml_compute_forward_map_unary_f32(
- const ggml_compute_params * params,
- ggml_tensor * dst,
- const ggml_unary_op_f32_t fun) {
-
- const ggml_tensor * src0 = dst->src[0];
-
- if (params->ith != 0) {
- return;
- }
-
- assert(ggml_is_contiguous_1(src0));
- assert(ggml_is_contiguous_1(dst));
- assert(ggml_are_same_shape(src0, dst));
-
- const int n = ggml_nrows(src0);
- const int nc = src0->ne[0];
-
- for (int i = 0; i < n; i++) {
- fun(nc,
- (float *) ((char *) dst->data + i*( dst->nb[1])),
- (float *) ((char *) src0->data + i*(src0->nb[1])));
- }
-}
-
-void ggml_compute_forward_map_unary(
- const ggml_compute_params * params,
- ggml_tensor * dst,
- const ggml_unary_op_f32_t fun) {
-
- const ggml_tensor * src0 = dst->src[0];
-
- switch (src0->type) {
- case GGML_TYPE_F32:
- {
- ggml_compute_forward_map_unary_f32(params, dst, fun);
- } break;
- default:
- {
- GGML_ABORT("fatal error");
- }
- }
-}
-
-// ggml_compute_forward_map_binary
-
-static void ggml_compute_forward_map_binary_f32(
- const ggml_compute_params * params,
- ggml_tensor * dst,
- const ggml_binary_op_f32_t fun) {
-
- const ggml_tensor * src0 = dst->src[0];
- const ggml_tensor * src1 = dst->src[1];
-
- if (params->ith != 0) {
- return;
- }
-
- assert(ggml_is_contiguous_1(src0));
- assert(ggml_is_contiguous_1(src1));
- assert(ggml_is_contiguous_1(dst));
- assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
-
- const int n = ggml_nrows(src0);
- const int nc = src0->ne[0];
-
- for (int i = 0; i < n; i++) {
- fun(nc,
- (float *) ((char *) dst->data + i*( dst->nb[1])),
- (float *) ((char *) src0->data + i*(src0->nb[1])),
- (float *) ((char *) src1->data + i*(src1->nb[1])));
- }
-}
-
-void ggml_compute_forward_map_binary(
- const ggml_compute_params * params,
- ggml_tensor * dst,
- const ggml_binary_op_f32_t fun) {
-
- const ggml_tensor * src0 = dst->src[0];
-
- switch (src0->type) {
- case GGML_TYPE_F32:
- {
- ggml_compute_forward_map_binary_f32(params, dst, fun);
- } break;
- default:
- {
- GGML_ABORT("fatal error");
- }
- }
-}
-
-// ggml_compute_forward_map_custom1
-
-void ggml_compute_forward_map_custom1_f32(
- const ggml_compute_params * params,
- ggml_tensor * dst,
- const ggml_custom1_op_f32_t fun) {
-
- const ggml_tensor * a = dst->src[0];
-
- if (params->ith != 0) {
- return;
- }
-
- fun(dst, a);
-}
-
-// ggml_compute_forward_map_custom2
-
-void ggml_compute_forward_map_custom2_f32(
- const ggml_compute_params * params,
- ggml_tensor * dst,
- const ggml_custom2_op_f32_t fun) {
-
- const ggml_tensor * a = dst->src[0];
- const ggml_tensor * b = dst->src[1];
-
- if (params->ith != 0) {
- return;
- }
-
- fun(dst, a, b);
-}
-
-// ggml_compute_forward_map_custom3
-
-void ggml_compute_forward_map_custom3_f32(
- const ggml_compute_params * params,
- ggml_tensor * dst,
- const ggml_custom3_op_f32_t fun) {
-
- const ggml_tensor * a = dst->src[0];
- const ggml_tensor * b = dst->src[1];
- const ggml_tensor * c = dst->src[1];
-
- if (params->ith != 0) {
- return;
- }
-
- fun(dst, a, b, c);
-}
-
// ggml_compute_forward_map_custom1
void ggml_compute_forward_map_custom1(
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
}
+// ggml_compute_forward_custom
+
+void ggml_compute_forward_custom(
+ const struct ggml_compute_params * params,
+ struct ggml_tensor * dst) {
+
+ struct ggml_custom_op_params p;
+ memcpy(&p, dst->op_params, sizeof(p));
+
+ p.fun(dst, params->ith, params->nth, p.userdata);
+}
+
// ggml_compute_forward_cross_entropy_loss
static void ggml_compute_forward_cross_entropy_loss_f32(
void ggml_compute_forward_rwkv_wkv6(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_rwkv_wkv7(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_gla(const struct ggml_compute_params * params, struct ggml_tensor * dst);
-void ggml_compute_forward_map_unary(
- const struct ggml_compute_params * params,
- struct ggml_tensor * dst,
- const ggml_unary_op_f32_t fun);
-void ggml_compute_forward_map_binary(
- const struct ggml_compute_params * params,
- struct ggml_tensor * dst,
- const ggml_binary_op_f32_t fun);
-void ggml_compute_forward_map_custom1_f32(
- const struct ggml_compute_params * params,
- struct ggml_tensor * dst,
- const ggml_custom1_op_f32_t fun);
-void ggml_compute_forward_map_custom2_f32(
- const struct ggml_compute_params * params,
- struct ggml_tensor * dst,
- const ggml_custom2_op_f32_t fun);
-void ggml_compute_forward_map_custom3_f32(
- const struct ggml_compute_params * params,
- struct ggml_tensor * dst,
- const ggml_custom3_op_f32_t fun);
void ggml_compute_forward_map_custom1(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_map_custom2(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_map_custom3(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_custom(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_cross_entropy_loss(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_cross_entropy_loss_back(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_opt_step_adamw(const struct ggml_compute_params * params, struct ggml_tensor * dst);
struct ggml_map_custom3_op_params {
ggml_custom3_op_t fun;
- int n_tasks;
- void * userdata;
+ int n_tasks;
+ void * userdata;
+};
+
+struct ggml_custom_op_params {
+ ggml_custom_op_t fun;
+ int n_tasks;
+ void * userdata;
};
// bitset
"UNARY",
- "MAP_UNARY",
- "MAP_BINARY",
-
- "MAP_CUSTOM1_F32",
- "MAP_CUSTOM2_F32",
- "MAP_CUSTOM3_F32",
-
"MAP_CUSTOM1",
"MAP_CUSTOM2",
"MAP_CUSTOM3",
+ "CUSTOM",
+
"CROSS_ENTROPY_LOSS",
"CROSS_ENTROPY_LOSS_BACK",
"OPT_STEP_ADAMW",
};
-static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
+static_assert(GGML_OP_COUNT == 81, "GGML_OP_COUNT != 81");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
"unary(x)",
- "f(x)",
- "f(x,y)",
-
- "custom_f32(x)",
- "custom_f32(x,y)",
- "custom_f32(x,y,z)",
+ "map_custom(x)",
+ "map_custom(x,y)",
+ "map_custom(x,y,z)",
"custom(x)",
- "custom(x,y)",
- "custom(x,y,z)",
"cross_entropy_loss(x,y)",
"cross_entropy_loss_back(x,y)",
"adamw(x)",
};
-static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
+static_assert(GGML_OP_COUNT == 81, "GGML_OP_COUNT != 81");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
return ggml_unary_impl(ctx, a, op, true);
}
-// ggml_map_unary
-
-static struct ggml_tensor * ggml_map_unary_impl_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- const ggml_unary_op_f32_t fun,
- bool inplace) {
- struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
-
- ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
-
- result->op = GGML_OP_MAP_UNARY;
- result->src[0] = a;
-
- return result;
-}
-
-struct ggml_tensor * ggml_map_unary_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- const ggml_unary_op_f32_t fun) {
- return ggml_map_unary_impl_f32(ctx, a, fun, false);
-}
-
-struct ggml_tensor * ggml_map_unary_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- const ggml_unary_op_f32_t fun) {
- return ggml_map_unary_impl_f32(ctx, a, fun, true);
-}
-
-// ggml_map_binary
-
-static struct ggml_tensor * ggml_map_binary_impl_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- const ggml_binary_op_f32_t fun,
- bool inplace) {
- GGML_ASSERT(ggml_are_same_shape(a, b));
-
- struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
-
- ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
-
- result->op = GGML_OP_MAP_BINARY;
- result->src[0] = a;
- result->src[1] = b;
-
- return result;
-}
-
-struct ggml_tensor * ggml_map_binary_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- const ggml_binary_op_f32_t fun) {
- return ggml_map_binary_impl_f32(ctx, a, b, fun, false);
-}
-
-struct ggml_tensor * ggml_map_binary_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- const ggml_binary_op_f32_t fun) {
- return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
-}
-
-// ggml_map_custom1_f32
-
-static struct ggml_tensor * ggml_map_custom1_impl_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- const ggml_custom1_op_f32_t fun,
- bool inplace) {
- struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
-
- ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
-
- result->op = GGML_OP_MAP_CUSTOM1_F32;
- result->src[0] = a;
-
- return result;
-}
-
-struct ggml_tensor * ggml_map_custom1_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- const ggml_custom1_op_f32_t fun) {
- return ggml_map_custom1_impl_f32(ctx, a, fun, false);
-}
-
-struct ggml_tensor * ggml_map_custom1_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- const ggml_custom1_op_f32_t fun) {
- return ggml_map_custom1_impl_f32(ctx, a, fun, true);
-}
-
-// ggml_map_custom2_f32
-
-static struct ggml_tensor * ggml_map_custom2_impl_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- const ggml_custom2_op_f32_t fun,
- bool inplace) {
- struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
-
- ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
-
- result->op = GGML_OP_MAP_CUSTOM2_F32;
- result->src[0] = a;
- result->src[1] = b;
-
- return result;
-}
-
-struct ggml_tensor * ggml_map_custom2_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- const ggml_custom2_op_f32_t fun) {
- return ggml_map_custom2_impl_f32(ctx, a, b, fun, false);
-}
-
-struct ggml_tensor * ggml_map_custom2_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- const ggml_custom2_op_f32_t fun) {
- return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
-}
-
-// ggml_map_custom3_f32
-
-static struct ggml_tensor * ggml_map_custom3_impl_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- struct ggml_tensor * c,
- const ggml_custom3_op_f32_t fun,
- bool inplace) {
- struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
-
- ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
-
- result->op = GGML_OP_MAP_CUSTOM3_F32;
- result->src[0] = a;
- result->src[1] = b;
- result->src[2] = c;
-
- return result;
-}
-
-struct ggml_tensor * ggml_map_custom3_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- struct ggml_tensor * c,
- const ggml_custom3_op_f32_t fun) {
- return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, false);
-}
-
-struct ggml_tensor * ggml_map_custom3_inplace_f32(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- struct ggml_tensor * c,
- const ggml_custom3_op_f32_t fun) {
- return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
-}
-
// ggml_map_custom1
static struct ggml_tensor * ggml_map_custom1_impl(
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
- ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
+ ggml_set_op_params(result, ¶ms, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM1;
result->src[0] = a;
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
- ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
+ ggml_set_op_params(result, ¶ms, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM2;
result->src[0] = a;
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
- ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
+ ggml_set_op_params(result, ¶ms, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM3;
result->src[0] = a;
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
}
+struct ggml_tensor * ggml_custom_4d(
+ struct ggml_context * ctx,
+ enum ggml_type type,
+ int64_t ne0,
+ int64_t ne1,
+ int64_t ne2,
+ int64_t ne3,
+ struct ggml_tensor ** args,
+ int n_args,
+ ggml_custom_op_t fun,
+ int n_tasks,
+ void * userdata) {
+
+ GGML_ASSERT(n_args < GGML_MAX_SRC);
+
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
+
+ struct ggml_custom_op_params params = {
+ /*.fun =*/ fun,
+ /*.n_tasks =*/ n_tasks,
+ /*.userdata =*/ userdata
+ };
+ ggml_set_op_params(result, ¶ms, sizeof(params));
+
+ result->op = GGML_OP_CUSTOM;
+ for (int i = 0; i < n_args; i++) {
+ result->src[i] = args[i];
+ }
+
+ return result;
+}
+
+struct ggml_tensor * ggml_custom_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor ** args,
+ int n_args,
+ ggml_custom_op_t fun,
+ int n_tasks,
+ void * userdata) {
+
+ GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
+
+ struct ggml_tensor * result = ggml_view_tensor(ctx, a);
+
+ struct ggml_custom_op_params params = {
+ /*.fun =*/ fun,
+ /*.n_tasks =*/ n_tasks,
+ /*.userdata =*/ userdata
+ };
+ ggml_set_op_params(result, ¶ms, sizeof(params));
+
+ result->op = GGML_OP_CUSTOM;
+ result->src[0] = a;
+ for (int i = 0; i < n_args; i++) {
+ result->src[i + 1] = args[i];
+ }
+
+ return result;
+}
// ggml_cross_entropy_loss
struct ggml_tensor * ggml_cross_entropy_loss(
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
-#include <assert.h>
#if defined(_WIN32)
#include <windows.h>
void custom1(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata) {
// check that the userdata is correct
- assert(userdata == NULL);
- assert(ggml_are_same_shape(dst, a));
+ GGML_ASSERT(userdata == NULL);
+ GGML_ASSERT(ggml_are_same_shape(dst, a));
atomic_fetch_add(&g_custom1_count, 1);
float * dst_data = ggml_get_data_f32(dst);
// this assumes that the tensors are contiguous
- assert(ggml_is_contiguous(dst));
- assert(ggml_is_contiguous(a));
+ GGML_ASSERT(ggml_is_contiguous(dst));
+ GGML_ASSERT(ggml_is_contiguous(a));
// parallelize by elements
const int ne = (int)ggml_nelements(dst);
void custom2(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata) {
// check that the userdata is correct
- assert(userdata == g_userdata);
- assert(strcmp(userdata, "ggml") == 0);
- assert(ggml_are_same_shape(dst, a));
- assert(ggml_are_same_shape(dst, b));
+ GGML_ASSERT(userdata == g_userdata);
+ GGML_ASSERT(strcmp(userdata, "ggml") == 0);
+ GGML_ASSERT(ggml_are_same_shape(dst, a));
+ GGML_ASSERT(ggml_are_same_shape(dst, b));
atomic_fetch_add(&g_custom2_count, 1);
const int nc = (int)dst->ne[0];
// this assumes that the tensors are contiguous
- assert(ggml_is_contiguous(dst));
- assert(ggml_is_contiguous(a));
- assert(ggml_is_contiguous(b));
+ GGML_ASSERT(ggml_is_contiguous(dst));
+ GGML_ASSERT(ggml_is_contiguous(a));
+ GGML_ASSERT(ggml_is_contiguous(b));
for (int ir = ir0; ir < ir1; ++ir) {
for (int ic = 0; ic < nc; ++ic) {
void custom3(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata) {
// check that the userdata is correct
- assert(userdata == g_userdata);
- assert(strcmp(userdata, "ggml") == 0);
- assert(ggml_are_same_shape(dst, a));
- assert(ggml_are_same_shape(dst, b));
- assert(ggml_are_same_shape(dst, c));
+ GGML_ASSERT(userdata == g_userdata);
+ GGML_ASSERT(strcmp(userdata, "ggml") == 0);
+ GGML_ASSERT(ggml_are_same_shape(dst, a));
+ GGML_ASSERT(ggml_are_same_shape(dst, b));
+ GGML_ASSERT(ggml_are_same_shape(dst, c));
atomic_fetch_add(&g_custom3_count, 1);
float * dst_data = ggml_get_data_f32(dst);
// dont parallelize
- assert(ith == 0);
+ GGML_ASSERT(ith == 0);
// number of elements
const int ne = (int)ggml_nelements(dst);
// this assumes that the tensors are contiguous
- assert(ggml_is_contiguous(dst));
- assert(ggml_is_contiguous(a));
- assert(ggml_is_contiguous(b));
- assert(ggml_is_contiguous(c));
+ GGML_ASSERT(ggml_is_contiguous(dst));
+ GGML_ASSERT(ggml_is_contiguous(a));
+ GGML_ASSERT(ggml_is_contiguous(b));
+ GGML_ASSERT(ggml_is_contiguous(c));
for (int i = 0; i < ne; ++i) {
dst_data[i] = a_data[i] + b_data[i] + c_data[i];
}
}
+void custom(struct ggml_tensor * dst, int ith, int nth, void * userdata) {
+ struct ggml_tensor * src0 = dst->src[0];
+ struct ggml_tensor * src1 = dst->src[1];
+ struct ggml_tensor * src2 = dst->src[2];
+ struct ggml_tensor * src3 = dst->src[3];
+ struct ggml_tensor * src4 = dst->src[4];
+
+ int32_t * dst_data = (int32_t *) ggml_get_data(dst);
+ const float * src0_data = ggml_get_data_f32(src0);
+ const float * src1_data = ggml_get_data_f32(src1);
+ const float * src2_data = ggml_get_data_f32(src2);
+ const float * src3_data = ggml_get_data_f32(src3);
+ const float * src4_data = ggml_get_data_f32(src4);
+
+ // check that the userdata is correct
+ GGML_ASSERT(userdata == g_userdata);
+ GGML_ASSERT(strcmp(userdata, "ggml") == 0);
+
+ // check that the tensors are contiguous
+ GGML_ASSERT(ggml_is_contiguous(dst));
+ GGML_ASSERT(ggml_is_contiguous(src0));
+ GGML_ASSERT(ggml_is_contiguous(src1));
+ GGML_ASSERT(ggml_is_contiguous(src2));
+ GGML_ASSERT(ggml_is_contiguous(src3));
+ GGML_ASSERT(ggml_is_contiguous(src4));
+
+ // check that the shapes are the same
+ GGML_ASSERT(ggml_are_same_shape(dst, src0));
+ GGML_ASSERT(ggml_are_same_shape(dst, src1));
+ GGML_ASSERT(ggml_are_same_shape(dst, src2));
+ GGML_ASSERT(ggml_are_same_shape(dst, src3));
+ GGML_ASSERT(ggml_are_same_shape(dst, src4));
+
+
+ for (int i = ith; i < ggml_nelements(dst); i += nth) {
+ dst_data[i] = src0_data[i] + src1_data[i] * src2_data[i] - src3_data[i] * src4_data[i];
+ }
+}
+
int main(int argc, const char** argv) {
float buf1_f32[1024];
const float * output = ggml_get_data_f32(m1);
for (int i = 0; i < ggml_nelements(m1); ++i) {
- assert(output[i] == buf1_f32[i] * 2);
+ GGML_ASSERT(output[i] == buf1_f32[i] * 2);
}
- assert(g_custom1_count == 2);
+ GGML_ASSERT(g_custom1_count == 2);
ggml_free(ctx);
}
{
struct ggml_context * ctx = make_ctx();
struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
- memcpy(t1->data, buf1_f32, ggml_nbytes(t1));
struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t1->data, buf1_f32, ggml_nbytes(t1));
memcpy(t2->data, buf2_f32, ggml_nbytes(t2));
struct ggml_tensor * m2 = ggml_map_custom2(ctx, t1, t2, custom2, GGML_N_TASKS_MAX, g_userdata);
const float * output = ggml_get_data_f32(m2);
for (int i = 0; i < ggml_nelements(m2); ++i) {
- assert(output[i] == buf1_f32[i] + buf2_f32[i]);
+ GGML_ASSERT(output[i] == buf1_f32[i] + buf2_f32[i]);
}
- assert(g_custom2_count == 4);
+ GGML_ASSERT(g_custom2_count == 4);
ggml_free(ctx);
}
{
struct ggml_context * ctx = make_ctx();
struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
- memcpy(t1->data, buf1_f32, ggml_nbytes(t1));
struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
- memcpy(t2->data, buf2_f32, ggml_nbytes(t2));
struct ggml_tensor * t3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+
+ memcpy(t1->data, buf1_f32, ggml_nbytes(t1));
+ memcpy(t2->data, buf2_f32, ggml_nbytes(t2));
memcpy(t3->data, buf3_f32, ggml_nbytes(t3));
struct ggml_tensor * m3 = ggml_map_custom3(ctx, t1, t2, t3, custom3, 1, g_userdata);
const float * output = ggml_get_data_f32(m3);
for (int i = 0; i < ggml_nelements(m3); ++i) {
- assert(output[i] == buf1_f32[i] + buf2_f32[i] + buf3_f32[i]);
+ GGML_ASSERT(output[i] == buf1_f32[i] + buf2_f32[i] + buf3_f32[i]);
}
- assert(g_custom3_count == 1);
+ GGML_ASSERT(g_custom3_count == 1);
ggml_free(ctx);
}
+ // custom
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ struct ggml_tensor * t3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ struct ggml_tensor * t4 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ struct ggml_tensor * t5 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t1->data, buf1_f32, ggml_nbytes(t1));
+ memcpy(t2->data, buf2_f32, ggml_nbytes(t2));
+ memcpy(t3->data, buf3_f32, ggml_nbytes(t3));
+ memcpy(t4->data, buf1_f32, ggml_nbytes(t4));
+ memcpy(t5->data, buf2_f32, ggml_nbytes(t5));
+
+ struct ggml_tensor * args[] = {
+ t1, t2, t3, t4, t5,
+ };
+
+ struct ggml_tensor * m4 = ggml_custom_4d(ctx, GGML_TYPE_I32, 10, 2, 1, 1, args, sizeof(args)/sizeof(args[0]), custom, GGML_N_TASKS_MAX, g_userdata);
+
+ struct ggml_cgraph * graph = ggml_new_graph(ctx);
+ ggml_build_forward_expand(graph, m4);
+
+ ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+ const int32_t * output = (const int32_t *) ggml_get_data(m4);
+
+ for (int i = 0; i < ggml_nelements(m4); ++i) {
+ GGML_ASSERT(output[i] == buf1_f32[i] + buf2_f32[i] * buf3_f32[i] - buf1_f32[i] * buf2_f32[i]);
+ }
+
+ ggml_free(ctx);
+ }
return 0;
}