"FLASH_ATTN",
"FLASH_FF",
+
+ "MAP_UNARY",
+ "MAP_BINARY",
};
-static_assert(GGML_OP_COUNT == 36, "GGML_OP_COUNT != 36");
+static_assert(GGML_OP_COUNT == 38, "GGML_OP_COUNT != 38");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
"flash_attn(x)",
"flash_ff(x)",
+
+ "f(x)",
+ "f(x,y)",
};
-static_assert(GGML_OP_COUNT == 36, "GGML_OP_COUNT != 36");
+static_assert(GGML_OP_COUNT == 38, "GGML_OP_COUNT != 38");
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
return NULL;
}
+ const size_t mem_size = (params.mem_size + GGML_MEM_ALIGN - 1) & ~(GGML_MEM_ALIGN - 1);
+
*ctx = (struct ggml_context) {
- /*.mem_size =*/ params.mem_size,
- /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(params.mem_size),
+ /*.mem_size =*/ mem_size,
+ /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size),
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
/*.no_alloc =*/ params.no_alloc,
/*.n_objects =*/ 0,
/*.scratch_save =*/ { 0, 0, NULL, },
};
- GGML_ASSERT(ctx->mem_buffer != NULL); // check for allocation failure
+ GGML_ASSERT(ctx->mem_buffer != NULL);
ggml_assert_aligned(ctx->mem_buffer);
return result;
}
+// ggml_map_unary
+
+struct ggml_tensor * ggml_map_unary_impl_f32(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ const ggml_unary_op_f32_t fun,
+ bool inplace) {
+ bool is_node = false;
+
+ if (!inplace && a->grad) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
+ *((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
+ struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ result->op = GGML_OP_MAP_UNARY;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src0 = a;
+ result->opt[0] = addr_tensor;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_map_unary_f32(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ const ggml_unary_op_f32_t fun) {
+ return ggml_map_unary_impl_f32(ctx, a, fun, false);
+}
+
+struct ggml_tensor * ggml_map_unary_inplace_f32(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ const ggml_unary_op_f32_t fun) {
+ return ggml_map_unary_impl_f32(ctx, a, fun, true);
+}
+
+// ggml_map_binary
+
+struct ggml_tensor * ggml_map_binary_impl_f32(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ const ggml_binary_op_f32_t fun,
+ bool inplace) {
+ GGML_ASSERT(ggml_are_same_shape(a, b));
+
+ bool is_node = false;
+
+ if (!inplace && (a->grad || b->grad)) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
+ *((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
+ struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ result->op = GGML_OP_MAP_BINARY;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src0 = a;
+ result->src1 = b;
+ result->opt[0] = addr_tensor;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_map_binary_f32(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ const ggml_binary_op_f32_t fun) {
+ return ggml_map_binary_impl_f32(ctx, a, b, fun, false);
+}
+
+struct ggml_tensor * ggml_map_binary_inplace_f32(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ const ggml_binary_op_f32_t fun) {
+ return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
+}
+
////////////////////////////////////////////////////////////////////////////////
void ggml_set_param(
// row index used to determine which thread to use
int ir = 0;
+ const float theta_scale = powf(10000.0, -2.0f/n_dims);
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
const int p = (mode == 0 ? n_past + i2 : i2);
if (ir++ < ir0) continue;
if (ir > ir1) break;
+ float theta = (float)p;
+
for (int i0 = 0; i0 < n_dims; i0 += 2) {
- const float theta = powf(10000.0, ((float)-i0)/n_dims);
+ const float cos_theta = cosf(theta);
+ const float sin_theta = sinf(theta);
- const float cos_theta = cosf(p*theta);
- const float sin_theta = sinf(p*theta);
+ theta *= theta_scale;
const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
// row index used to determine which thread to use
int ir = 0;
+ const float theta_scale = powf(10000.0, -2.0f/n_dims);
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
const int p = (mode == 0 ? n_past + i2 : i2);
if (ir++ < ir0) continue;
if (ir > ir1) break;
+ float theta = (float)p;
+
for (int i0 = 0; i0 < n_dims; i0 += 2) {
- const float theta = powf(10000.0, ((float)-i0)/n_dims);
+ const float cos_theta = cosf(theta);
+ const float sin_theta = sinf(theta);
- const float cos_theta = cosf(p*theta);
- const float sin_theta = sinf(p*theta);
+ theta *= theta_scale;
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
}
}
+// ggml_compute_forward_map_unary
+
+static void ggml_compute_forward_map_unary_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst,
+ const ggml_unary_op_f32_t fun) {
+ GGML_ASSERT(ggml_are_same_shape(src0, dst));
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ const int n = ggml_nrows(src0);
+ const int nc = src0->ne[0];
+
+ assert( dst->nb[0] == sizeof(float));
+ assert(src0->nb[0] == sizeof(float));
+
+ for (int i = 0; i < n; i++) {
+ fun(nc,
+ (float *) ((char *) dst->data + i*( dst->nb[1])),
+ (float *) ((char *) src0->data + i*(src0->nb[1])));
+ }
+}
+
+
+static void ggml_compute_forward_map_unary(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst,
+ const ggml_unary_op_f32_t fun) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_map_unary_f32(params, src0, dst, fun);
+ } break;
+ case GGML_TYPE_Q4_0:
+ case GGML_TYPE_Q4_1:
+ case GGML_TYPE_I8:
+ case GGML_TYPE_I16:
+ case GGML_TYPE_I32:
+ case GGML_TYPE_F16:
+ case GGML_TYPE_COUNT:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
+// ggml_compute_forward_map_binary
+
+static void ggml_compute_forward_map_binary_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ const struct ggml_tensor * src1,
+ struct ggml_tensor * dst,
+ const ggml_binary_op_f32_t fun) {
+ assert(params->ith == 0);
+ assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ const int n = ggml_nrows(src0);
+ const int nc = src0->ne[0];
+
+ assert( dst->nb[0] == sizeof(float));
+ assert(src0->nb[0] == sizeof(float));
+ assert(src1->nb[0] == sizeof(float));
+
+ for (int i = 0; i < n; i++) {
+ fun(nc,
+ (float *) ((char *) dst->data + i*( dst->nb[1])),
+ (float *) ((char *) src0->data + i*(src0->nb[1])),
+ (float *) ((char *) src1->data + i*(src1->nb[1])));
+ }
+}
+
+
+static void ggml_compute_forward_map_binary(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ const struct ggml_tensor * src1,
+ struct ggml_tensor * dst,
+ const ggml_binary_op_f32_t fun) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_map_binary_f32(params, src0, src1, dst, fun);
+ } break;
+ case GGML_TYPE_Q4_0:
+ case GGML_TYPE_Q4_1:
+ case GGML_TYPE_I8:
+ case GGML_TYPE_I16:
+ case GGML_TYPE_I32:
+ case GGML_TYPE_F16:
+ case GGML_TYPE_COUNT:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
/////////////////////////////////
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
{
ggml_compute_forward_flash_ff(params, tensor->src0, tensor->src1, tensor->opt[0], tensor->opt[1], tensor->opt[2], tensor);
} break;
+ case GGML_OP_MAP_UNARY:
+ {
+ const ggml_unary_op_f32_t fun = *((ggml_unary_op_f32_t *)tensor->opt[0]->data);
+ ggml_compute_forward_map_unary(params, tensor->src0, tensor, fun);
+ }
+ break;
+ case GGML_OP_MAP_BINARY:
+ {
+ const ggml_binary_op_f32_t fun = *((ggml_binary_op_f32_t *)tensor->opt[0]->data);
+ ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun);
+ }
+ break;
case GGML_OP_NONE:
{
// nop
{
GGML_ASSERT(false); // not supported
} break;
+ case GGML_OP_MAP_UNARY:
+ case GGML_OP_MAP_BINARY:
+ {
+ GGML_ASSERT(false); // not supported
+ } break;
case GGML_OP_NONE:
{
// nop
work_size = MAX(work_size, cur);
} break;
+ case GGML_OP_MAP_UNARY:
+ case GGML_OP_MAP_BINARY:
+ {
+ node->n_tasks = 1;
+ } break;
case GGML_OP_NONE:
{
node->n_tasks = 1;