# define GGML_API
#endif
+// TODO: support for clang
+#ifdef __GNUC__
+# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
+#elif defined(_MSC_VER)
+# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
+#else
+# define GGML_DEPRECATED(func, hint) func
+#endif
+
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
GGML_OP_MAP_UNARY,
GGML_OP_MAP_BINARY,
+ GGML_OP_MAP_CUSTOM1_F32,
+ GGML_OP_MAP_CUSTOM2_F32,
+ GGML_OP_MAP_CUSTOM3_F32,
+
GGML_OP_MAP_CUSTOM1,
GGML_OP_MAP_CUSTOM2,
GGML_OP_MAP_CUSTOM3,
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
+ GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
+
// use this to compute the memory overhead of a tensor
GGML_API size_t ggml_tensor_overhead(void);
int h0,
int w);
- // custom operators
-
- typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
- typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
-
- typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
- typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
- typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
-
GGML_API struct ggml_tensor * ggml_unary(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * a,
enum ggml_unary_op op);
- GGML_API struct ggml_tensor * ggml_map_unary_f32(
+ // custom operators
+
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
+
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
+
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_unary_op_f32_t fun);
+ ggml_unary_op_f32_t fun),
+ "use ggml_map_custom1 instead");
- GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_unary_op_f32_t fun);
+ ggml_unary_op_f32_t fun),
+ "use ggml_map_custom1_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_binary_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_binary_op_f32_t fun);
+ ggml_binary_op_f32_t fun),
+ "use ggml_map_custom2 instead");
- GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_binary_op_f32_t fun);
+ ggml_binary_op_f32_t fun),
+ "use ggml_map_custom2_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_custom1_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_custom1_op_f32_t fun);
+ ggml_custom1_op_f32_t fun),
+ "use ggml_map_custom1 instead");
- GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
- ggml_custom1_op_f32_t fun);
+ ggml_custom1_op_f32_t fun),
+ "use ggml_map_custom1_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_custom2_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_custom2_op_f32_t fun);
+ ggml_custom2_op_f32_t fun),
+ "use ggml_map_custom2 instead");
- GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- ggml_custom2_op_f32_t fun);
+ ggml_custom2_op_f32_t fun),
+ "use ggml_map_custom2_inplace instead");
- GGML_API struct ggml_tensor * ggml_map_custom3_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
- ggml_custom3_op_f32_t fun);
+ ggml_custom3_op_f32_t fun),
+ "use ggml_map_custom3 instead");
- GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
- ggml_custom3_op_f32_t fun);
+ ggml_custom3_op_f32_t fun),
+ "use ggml_map_custom3_inplace instead");
+
+// custom operators v2
+ typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
+ typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
+ typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
+
+ #define GGML_N_TASKS_MAX -1
+
+ GGML_API struct ggml_tensor * ggml_map_custom1(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ ggml_custom1_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ ggml_custom1_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom2(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ ggml_custom2_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ ggml_custom2_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom3(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ struct ggml_tensor * c,
+ ggml_custom3_op_t fun,
+ int n_tasks,
+ void * userdata);
+
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ struct ggml_tensor * c,
+ ggml_custom3_op_t fun,
+ int n_tasks,
+ void * userdata);
// loss function
"CROSS_ENTROPY_LOSS_BACK",
};
-static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
+static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
"cross_entropy_loss_back(x,y)",
};
-static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
+static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
}
-static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
+bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return
return result;
}
+static void ggml_get_op_params(const struct ggml_tensor * tensor, void * params, size_t params_size) {
+ assert(params_size <= GGML_MAX_OP_PARAMS);
+ memcpy(params, tensor->op_params, params_size);
+}
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
assert(params_size <= GGML_MAX_OP_PARAMS);
memcpy(tensor->op_params, params, params_size);
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
a->ne[2], 1, 1,
};
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
int32_t params[] = { s0, p0, d0 };
ggml_set_op_params(result, ¶ms, sizeof(params));
// ggml_conv_2d
-struct ggml_tensor* ggml_conv_2d(
+struct ggml_tensor * ggml_conv_2d(
struct ggml_context* ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
a->ne[3], b->ne[3],
};
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
+
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
ggml_set_op_params(result, ¶ms, sizeof(params));
// ggml_conv_1d_ph
-struct ggml_tensor* ggml_conv_1d_ph(
+struct ggml_tensor * ggml_conv_1d_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
// ggml_pool_1d
-struct ggml_tensor* ggml_pool_1d(
+struct ggml_tensor * ggml_pool_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
enum ggml_op_pool op,
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
a->ne[1],
};
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
int32_t params[] = { op, k0, s0, p0 };
ggml_set_op_params(result, ¶ms, sizeof(params));
// ggml_pool_2d
-struct ggml_tensor* ggml_pool_2d(
+struct ggml_tensor * ggml_pool_2d(
struct ggml_context * ctx,
struct ggml_tensor * a,
enum ggml_op_pool op,
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
a->ne[2],
};
- struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
+
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, ¶ms, sizeof(params));
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
}
-// ggml_map_custom1
+// ggml_map_custom1_f32
static struct ggml_tensor * ggml_map_custom1_impl_f32(
struct ggml_context * ctx,
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
- result->op = GGML_OP_MAP_CUSTOM1;
+ result->op = GGML_OP_MAP_CUSTOM1_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
}
-// ggml_map_custom2
+// ggml_map_custom2_f32
static struct ggml_tensor * ggml_map_custom2_impl_f32(
struct ggml_context * ctx,
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
- result->op = GGML_OP_MAP_CUSTOM2;
+ result->op = GGML_OP_MAP_CUSTOM2_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
}
-// ggml_map_custom3
+// ggml_map_custom3_f32
static struct ggml_tensor * ggml_map_custom3_impl_f32(
struct ggml_context * ctx,
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
- result->op = GGML_OP_MAP_CUSTOM3;
+ result->op = GGML_OP_MAP_CUSTOM3_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
}
+// ggml_map_custom1
+struct ggml_map_custom1_op_params {
+ ggml_custom1_op_t fun;
+ int n_tasks;
+ void * userdata;
+};
+
+static struct ggml_tensor * ggml_map_custom1_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ const ggml_custom1_op_t fun,
+ int n_tasks,
+ void * userdata,
+ bool inplace) {
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
+
+ bool is_node = false;
+
+ if (!inplace && a->grad) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ struct ggml_map_custom1_op_params params = {
+ /*.fun =*/ fun,
+ /*.n_tasks =*/ n_tasks,
+ /*.userdata =*/ userdata
+ };
+ ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
+
+ result->op = GGML_OP_MAP_CUSTOM1;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_map_custom1(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ const ggml_custom1_op_t fun,
+ int n_tasks,
+ void * userdata) {
+ return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
+}
+
+struct ggml_tensor * ggml_map_custom1_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ const ggml_custom1_op_t fun,
+ int n_tasks,
+ void * userdata) {
+ return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
+}
+
+// ggml_map_custom2
+
+struct ggml_map_custom2_op_params {
+ ggml_custom2_op_t fun;
+ int n_tasks;
+ void * userdata;
+};
+
+static struct ggml_tensor * ggml_map_custom2_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ const ggml_custom2_op_t fun,
+ int n_tasks,
+ void * userdata,
+ bool inplace) {
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
+
+ bool is_node = false;
+
+ if (!inplace && (a->grad || b->grad)) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ struct ggml_map_custom2_op_params params = {
+ /*.fun =*/ fun,
+ /*.n_tasks =*/ n_tasks,
+ /*.userdata =*/ userdata
+ };
+ ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
+
+ result->op = GGML_OP_MAP_CUSTOM2;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+ result->src[1] = b;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_map_custom2(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ const ggml_custom2_op_t fun,
+ int n_tasks,
+ void * userdata) {
+ return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
+}
+
+struct ggml_tensor * ggml_map_custom2_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ const ggml_custom2_op_t fun,
+ int n_tasks,
+ void * userdata) {
+ return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
+}
+
+// ggml_map_custom3
+
+struct ggml_map_custom3_op_params {
+ ggml_custom3_op_t fun;
+ int n_tasks;
+ void * userdata;
+};
+
+static struct ggml_tensor * ggml_map_custom3_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ struct ggml_tensor * c,
+ const ggml_custom3_op_t fun,
+ int n_tasks,
+ void * userdata,
+ bool inplace) {
+ GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
+
+ bool is_node = false;
+
+ if (!inplace && (a->grad || b->grad || c->grad)) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ struct ggml_map_custom3_op_params params = {
+ /*.fun =*/ fun,
+ /*.n_tasks =*/ n_tasks,
+ /*.userdata =*/ userdata
+ };
+ ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
+
+ result->op = GGML_OP_MAP_CUSTOM3;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+ result->src[1] = b;
+ result->src[2] = c;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_map_custom3(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ struct ggml_tensor * c,
+ const ggml_custom3_op_t fun,
+ int n_tasks,
+ void * userdata) {
+ return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
+}
+
+struct ggml_tensor * ggml_map_custom3_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ struct ggml_tensor * c,
+ const ggml_custom3_op_t fun,
+ int n_tasks,
+ void * userdata) {
+ return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
+}
+
+
+
// ggml_cross_entropy_loss
struct ggml_tensor * ggml_cross_entropy_loss(
fun(dst, a);
}
-
-static void ggml_compute_forward_map_custom1(
- const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
- struct ggml_tensor * dst,
- const ggml_custom1_op_f32_t fun) {
- switch (a->type) {
- case GGML_TYPE_F32:
- {
- ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
- } break;
- default:
- {
- GGML_ASSERT(false);
- } break;
- }
-}
-
// ggml_compute_forward_map_custom2
static void ggml_compute_forward_map_custom2_f32(
}
-static void ggml_compute_forward_map_custom2(
- const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
- const struct ggml_tensor * b,
- struct ggml_tensor * dst,
- const ggml_custom2_op_f32_t fun) {
- switch (a->type) {
- case GGML_TYPE_F32:
- {
- ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
- } break;
- default:
- {
- GGML_ASSERT(false);
- } break;
- }
-}
-
// ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3_f32(
fun(dst, a, b, c);
}
+// ggml_compute_forward_map_custom1
+
+static void ggml_compute_forward_map_custom1(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * a,
+ struct ggml_tensor * dst) {
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
+
+ p->fun(dst, a, params->ith, params->nth, p->userdata);
+}
+
+// ggml_compute_forward_map_custom2
+
+static void ggml_compute_forward_map_custom2(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * a,
+ const struct ggml_tensor * b,
+ struct ggml_tensor * dst) {
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
+
+ p->fun(dst, a, b, params->ith, params->nth, p->userdata);
+}
+
+// ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
const struct ggml_tensor * c,
- struct ggml_tensor * dst,
- const ggml_custom3_op_f32_t fun) {
- switch (a->type) {
- case GGML_TYPE_F32:
- {
- ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
- } break;
- default:
- {
- GGML_ASSERT(false);
- } break;
+ struct ggml_tensor * dst) {
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
}
+
+ struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
+
+ p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
}
// ggml_compute_forward_cross_entropy_loss
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
}
break;
- case GGML_OP_MAP_CUSTOM1:
+ case GGML_OP_MAP_CUSTOM1_F32:
{
ggml_custom1_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
+ ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
}
break;
- case GGML_OP_MAP_CUSTOM2:
+ case GGML_OP_MAP_CUSTOM2_F32:
{
ggml_custom2_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
+ ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
}
break;
- case GGML_OP_MAP_CUSTOM3:
+ case GGML_OP_MAP_CUSTOM3_F32:
{
ggml_custom3_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
+ ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
+ }
+ break;
+ case GGML_OP_MAP_CUSTOM1:
+ {
+ ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
+ }
+ break;
+ case GGML_OP_MAP_CUSTOM2:
+ {
+ ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
+ }
+ break;
+ case GGML_OP_MAP_CUSTOM3:
+ {
+ ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
}
break;
case GGML_OP_CROSS_ENTROPY_LOSS:
} break;
case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY:
+ case GGML_OP_MAP_CUSTOM1_F32:
+ case GGML_OP_MAP_CUSTOM2_F32:
+ case GGML_OP_MAP_CUSTOM3_F32:
case GGML_OP_MAP_CUSTOM1:
case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3:
case GGML_OP_WIN_UNPART:
case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY:
+ case GGML_OP_MAP_CUSTOM1_F32:
+ case GGML_OP_MAP_CUSTOM2_F32:
+ case GGML_OP_MAP_CUSTOM3_F32:
+ {
+ n_tasks = 1;
+ } break;
case GGML_OP_MAP_CUSTOM1:
+ {
+ struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
+ n_tasks = n_threads;
+ } else {
+ n_tasks = MIN(p->n_tasks, n_threads);
+ }
+ } break;
case GGML_OP_MAP_CUSTOM2:
+ {
+ struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
+ n_tasks = n_threads;
+ } else {
+ n_tasks = MIN(p->n_tasks, n_threads);
+ }
+ } break;
case GGML_OP_MAP_CUSTOM3:
{
- n_tasks = 1;
+ struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
+ if (p->n_tasks == GGML_N_TASKS_MAX) {
+ n_tasks = n_threads;
+ } else {
+ n_tasks = MIN(p->n_tasks, n_threads);
+ }
} break;
case GGML_OP_CROSS_ENTROPY_LOSS:
{
--- /dev/null
+#include "ggml/ggml.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#if defined(_WIN32)
+
+#include <windows.h>
+static LONG atomic_fetch_add(atomic_int * ptr, LONG inc) {
+ return InterlockedExchangeAdd(ptr, inc);
+}
+
+#else
+#include <stdatomic.h>
+#endif
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+struct ggml_context * make_ctx(void) {
+ struct ggml_init_params params = {
+ /*.mem_size =*/ 1 * 1024 * 1024,
+ /*.mem_buffer =*/ NULL,
+ /*.no_alloc =*/ false,
+ };
+
+ return ggml_init(params);
+}
+
+char g_userdata[] = "ggml";
+atomic_int g_custom1_count = 0;
+atomic_int g_custom2_count = 0;
+atomic_int g_custom3_count = 0;
+
+void custom1(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata) {
+ // check that the userdata is correct
+ assert(userdata == NULL);
+
+ assert(ggml_are_same_shape(dst, a));
+
+ atomic_fetch_add(&g_custom1_count, 1);
+
+ const float * a_data = ggml_get_data_f32(a);
+ float * dst_data = ggml_get_data_f32(dst);
+
+
+ // parallelize by elements
+ const int ne = ggml_nelements(dst);
+ const int dr = (ne + nth - 1) / nth;
+ const int ie0 = dr * ith;
+ const int ie1 = MIN(ie0 + dr, ne);
+
+ for (int i = ie0; i < ie1; ++i) {
+ dst_data[i] = a_data[i] * 2;
+ }
+}
+
+void custom2(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata) {
+ // check that the userdata is correct
+ assert(userdata == g_userdata);
+ assert(strcmp(userdata, "ggml") == 0);
+ assert(ggml_are_same_shape(dst, a));
+ assert(ggml_are_same_shape(dst, b));
+
+ atomic_fetch_add(&g_custom2_count, 1);
+
+ const float * a_data = ggml_get_data_f32(a);
+ const float * b_data = ggml_get_data_f32(b);
+ float * dst_data = ggml_get_data_f32(dst);
+
+ // parallelize by rows
+ const int nr = ggml_nrows(dst);
+ // number of rows per thread
+ const int dr = (nr + nth - 1) / nth;
+ // row range for this thread
+ const int ir0 = dr * ith;
+ const int ir1 = MIN(ir0 + dr, nr);
+
+ // number of columns
+ const int nc = dst->ne[0];
+
+ for (int ir = ir0; ir < ir1; ++ir) {
+ for (int ic = 0; ic < nc; ++ic) {
+ const int i = ir * nc + ic;
+ dst_data[i] = a_data[i] + b_data[i];
+ }
+ }
+}
+
+void custom3(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata) {
+ // check that the userdata is correct
+ assert(userdata == g_userdata);
+ assert(strcmp(userdata, "ggml") == 0);
+ assert(ggml_are_same_shape(dst, a));
+ assert(ggml_are_same_shape(dst, b));
+ assert(ggml_are_same_shape(dst, c));
+
+ atomic_fetch_add(&g_custom3_count, 1);
+
+ const float * a_data = ggml_get_data_f32(a);
+ const float * b_data = ggml_get_data_f32(b);
+ const float * c_data = ggml_get_data_f32(c);
+ float * dst_data = ggml_get_data_f32(dst);
+
+
+ // dont parallelize
+ assert(ith == 0);
+
+ const int ne = ggml_nelements(dst);
+
+ for (int i = 0; i < ne; ++i) {
+ dst_data[i] = a_data[i] + b_data[i] + c_data[i];
+ }
+}
+
+int main(int argc, const char** argv) {
+
+ float buf1_f32[1024];
+ for (int i = 0; i < 1024; ++i) {
+ buf1_f32[i] = (float)(i + 1);
+ }
+ float buf2_f32[1024];
+ for (int i = 0; i < 1024; ++i) {
+ buf2_f32[i] = (float)(i + 1) * 2;
+ }
+ float buf3_f32[1024];
+ for (int i = 0; i < 1024; ++i) {
+ buf3_f32[i] = (float)(i + 1) * 3;
+ }
+
+ // map_custom1
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t->data, buf1_f32, ggml_nbytes(t));
+
+ struct ggml_tensor * m1 = ggml_map_custom1(ctx, t, custom1, 2, NULL);
+
+ struct ggml_cgraph graph = ggml_build_forward(m1);
+
+ ggml_graph_compute_with_ctx(ctx, &graph, 4);
+
+ const float * output = ggml_get_data_f32(m1);
+
+ for (int i = 0; i < ggml_nelements(m1); ++i) {
+ assert(output[i] == buf1_f32[i] * 2);
+ }
+ assert(g_custom1_count == 2);
+
+ ggml_free(ctx);
+ }
+
+ // map_custom2
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t1->data, buf1_f32, ggml_nbytes(t1));
+ struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t2->data, buf2_f32, ggml_nbytes(t2));
+
+ struct ggml_tensor * m2 = ggml_map_custom2(ctx, t1, t2, custom2, GGML_N_TASKS_MAX, g_userdata);
+
+ struct ggml_cgraph graph = ggml_build_forward(m2);
+
+ ggml_graph_compute_with_ctx(ctx, &graph, 4);
+
+ const float * output = ggml_get_data_f32(m2);
+
+ for (int i = 0; i < ggml_nelements(m2); ++i) {
+ assert(output[i] == buf1_f32[i] + buf2_f32[i]);
+ }
+
+ assert(g_custom2_count == 4);
+
+ ggml_free(ctx);
+ }
+
+ // map_custom3
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t1->data, buf1_f32, ggml_nbytes(t1));
+ struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t2->data, buf2_f32, ggml_nbytes(t2));
+ struct ggml_tensor * t3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+ memcpy(t3->data, buf3_f32, ggml_nbytes(t3));
+
+ struct ggml_tensor * m3 = ggml_map_custom3(ctx, t1, t2, t3, custom3, 1, g_userdata);
+
+ struct ggml_cgraph graph = ggml_build_forward(m3);
+
+ ggml_graph_compute_with_ctx(ctx, &graph, 4);
+
+ const float * output = ggml_get_data_f32(m3);
+
+ for (int i = 0; i < ggml_nelements(m3); ++i) {
+ assert(output[i] == buf1_f32[i] + buf2_f32[i] + buf3_f32[i]);
+ }
+
+ assert(g_custom3_count == 1);
+
+ ggml_free(ctx);
+ }
+
+
+ return 0;
+}