"ARGMAX",
"REPEAT",
"REPEAT_BACK",
+ "CONCAT",
"SILU_BACK",
"NORM",
"RMS_NORM",
"RMS_NORM_BACK",
+ "GROUP_NORM",
"MUL_MAT",
"OUT_PROD",
"CONV_TRANSPOSE_2D",
"POOL_1D",
"POOL_2D",
+ "UPSCALE",
"FLASH_ATTN",
"FLASH_FF",
"CROSS_ENTROPY_LOSS_BACK",
};
-static_assert(GGML_OP_COUNT == 65, "GGML_OP_COUNT != 65");
+static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
"argmax(x)",
"repeat(x)",
"repeat_back(x)",
+ "concat(x, y)",
"silu_back(x)",
"norm(x)",
"rms_norm(x)",
"rms_norm_back(x)",
+ "group_norm(x)",
"X*Y",
"X*Y",
"conv_transpose_2d(x)",
"pool_1d(x)",
"pool_2d(x)",
+ "upscale(x)",
"flash_attn(x)",
"flash_ff(x)",
"cross_entropy_loss_back(x,y)",
};
-static_assert(GGML_OP_COUNT == 65, "GGML_OP_COUNT != 65");
+static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
return result;
}
+// ggml_concat
+
+struct ggml_tensor* ggml_concat(
+ struct ggml_context* ctx,
+ struct ggml_tensor* a,
+ struct ggml_tensor* b) {
+ GGML_ASSERT(a->ne[0] == b->ne[0] && a->ne[1] == b->ne[1] && a->ne[3] == b->ne[3]);
+
+ bool is_node = false;
+
+ if (a->grad || b->grad) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, a->ne[0], a->ne[1], a->ne[2] + b->ne[2], a->ne[3]);
+
+ result->op = GGML_OP_CONCAT;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+ result->src[1] = b;
+
+ return result;
+}
+
// ggml_abs
struct ggml_tensor * ggml_abs(
return ggml_norm_impl(ctx, a, true);
}
+// ggml_rms_norm
+
static struct ggml_tensor * ggml_rms_norm_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
return ggml_rms_norm_impl(ctx, a, eps, true);
}
+// ggml_rms_norm_back
+
struct ggml_tensor * ggml_rms_norm_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
return result;
}
+// ggml_group_norm
+
+static struct ggml_tensor * ggml_group_norm_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_groups,
+ bool inplace) {
+
+ bool is_node = false;
+ if (!inplace && (a->grad)) {
+ GGML_ASSERT(false); // TODO: implement backward
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ result->op = GGML_OP_GROUP_NORM;
+ result->op_params[0] = n_groups;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+ result->src[1] = NULL; // TODO: maybe store epsilon here?
+
+ return result;
+}
+
+struct ggml_tensor * ggml_group_norm(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_groups) {
+ return ggml_group_norm_impl(ctx, a, n_groups, false);
+}
+
+struct ggml_tensor * ggml_group_norm_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_groups) {
+ return ggml_group_norm_impl(ctx, a, n_groups, true);
+}
// ggml_mul_mat
return result;
}
+// ggml_upscale
+
+static struct ggml_tensor * ggml_upscale_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int scale_factor) {
+ bool is_node = false;
+
+ if (a->grad) {
+ GGML_ASSERT(false); // TODO: implement backward
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
+ a->ne[0] * scale_factor,
+ a->ne[1] * scale_factor,
+ a->ne[2], a->ne[3]);
+
+ result->op = GGML_OP_UPSCALE;
+ result->op_params[0] = scale_factor;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+ result->src[1] = NULL;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_upscale(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int scale_factor) {
+ return ggml_upscale_impl(ctx, a, scale_factor);
+}
+
// ggml_flash_attn
struct ggml_tensor * ggml_flash_attn(
}
}
+// ggml_compute_forward_concat
+
+static void ggml_compute_forward_concat_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ const struct ggml_tensor * src1,
+ struct ggml_tensor * dst) {
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ GGML_ASSERT(src0->nb[0] == sizeof(float));
+
+ const int ith = params->ith;
+
+ GGML_TENSOR_BINARY_OP_LOCALS;
+
+ // TODO: support for transposed / permuted tensors
+ GGML_ASSERT(nb0 == sizeof(float));
+ GGML_ASSERT(nb00 == sizeof(float));
+ GGML_ASSERT(nb10 == sizeof(float));
+
+ for (int i3 = 0; i3 < ne3; i3++) {
+ for (int i2 = ith; i2 < ne2; i2++) {
+ if (i2 < ne02) { // src0
+ for (int i1 = 0; i1 < ne1; i1++) {
+ for (int i0 = 0; i0 < ne0; i0++) {
+ const float * x = (float *)((char *) src0->data + i0 * nb00 + i1 * nb01 + i2 * nb02 + i3 * nb03);
+
+ float * y = (float *)((char *)dst->data + i0 * nb0 + i1 * nb1 + i2 * nb2 + i3 * nb3);
+ *y = *x;
+ }
+ }
+ } // src1
+ else {
+ for (int i1 = 0; i1 < ne1; i1++) {
+ for (int i0 = 0; i0 < ne0; i0++) {
+ const float * x = (float *)((char *) src1->data + i0 * nb10 + i1 * nb11 + (i2 - ne02) * nb12 + i3 * nb13);
+
+ float * y = (float *)((char *)dst->data + i0 * nb0 + i1 * nb1 + i2 * nb2 + i3 * nb3);
+ *y = *x;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void ggml_compute_forward_concat(
+ const struct ggml_compute_params* params,
+ const struct ggml_tensor* src0,
+ const struct ggml_tensor* src1,
+ struct ggml_tensor* dst) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_concat_f32(params, src0, src1, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
// ggml_compute_forward_abs
static void ggml_compute_forward_abs_f32(
}
}
+// ggml_compute_forward_group_rms_norm
+
static void ggml_compute_forward_rms_norm_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
}
}
-
static void ggml_compute_forward_rms_norm_back_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
}
}
+// ggml_compute_forward_group_norm
+
+static void ggml_compute_forward_group_norm_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ GGML_ASSERT(ggml_are_same_shape(src0, dst));
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ GGML_ASSERT(src0->nb[0] == sizeof(float));
+
+ const int ith = params->ith;
+ const int nth = params->nth;
+
+ GGML_TENSOR_UNARY_OP_LOCALS;
+
+ const float eps = 1e-6f; // TODO: make this a parameter
+
+ // TODO: optimize
+
+ int n_channels = src0->ne[2];
+ int n_groups = dst->op_params[0];
+ int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
+ for (int i = ith; i < n_groups; i+=nth) {
+ int start = i * n_channels_per_group;
+ int end = start + n_channels_per_group;
+ if (end > n_channels) {
+ end = n_channels;
+ }
+ int step = end - start;
+
+ for (int64_t i03 = 0; i03 < ne03; i03++) {
+ ggml_float sum = 0.0;
+ for (int64_t i02 = start; i02 < end; i02++) {
+ for (int64_t i01 = 0; i01 < ne01; i01++) {
+ const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
+
+ for (int64_t i00 = 0; i00 < ne00; i00++) {
+ sum += (ggml_float)x[i00];
+ }
+ }
+ }
+ float mean = sum / (ne00 * ne01 * step);
+ ggml_float sum2 = 0.0;
+
+ for (int64_t i02 = start; i02 < end; i02++) {
+ for (int64_t i01 = 0; i01 < ne01; i01++) {
+ const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
+
+ float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3);
+
+ for (int64_t i00 = 0; i00 < ne00; i00++) {
+ float v = x[i00] - mean;
+ y[i00] = v;
+ sum2 += (ggml_float)(v * v);
+ }
+ }
+ }
+ float variance = sum2 / (ne00 * ne01 * step);
+ const float scale = 1.0f / sqrtf(variance + eps);
+
+ for (int64_t i02 = start; i02 < end; i02++) {
+ for (int64_t i01 = 0; i01 < ne01; i01++) {
+ float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3);
+ ggml_vec_scale_f32(ne00, y, scale);
+ }
+ }
+ }
+ }
+}
+
+static void ggml_compute_forward_group_norm(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_group_norm_f32(params, src0, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
// ggml_compute_forward_mul_mat
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst);
}
+// ggml_compute_forward_upscale
+
+static void ggml_compute_forward_upscale_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ GGML_ASSERT(src0->nb[0] == sizeof(float));
+
+ const int ith = params->ith;
+
+ GGML_TENSOR_UNARY_OP_LOCALS;
+
+ const int scale_factor = dst->op_params[0];
+
+ // TODO: optimize
+
+ for (int i03 = 0; i03 < ne03; i03++) {
+ for (int i02 = ith; i02 < ne02; i02++) {
+ for (int m = 0; m < dst->ne[1]; m++) {
+ int i01 = m / scale_factor;
+ for (int n = 0; n < dst->ne[0]; n++) {
+ int i00 = n / scale_factor;
+
+ const float * x = (float *)((char *) src0->data + i00 * nb00 +i01 * nb01 + i02 * nb02 + i03 * nb03);
+
+ float * y = (float *)((char *) dst->data + n * dst->nb[0] + m * dst->nb[1] + i02 * dst->nb[2] + i03 * dst->nb[3]);
+
+ *y = *x;
+ }
+ }
+ }
+ }
+}
+
+static void ggml_compute_forward_upscale(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_upscale_f32(params, src0, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
// ggml_compute_forward_flash_attn
static void ggml_compute_forward_flash_attn_f32(
{
ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
} break;
+ case GGML_OP_CONCAT:
+ {
+ ggml_compute_forward_concat(params, tensor->src[0], tensor->src[1], tensor);
+ } break;
case GGML_OP_SILU_BACK:
{
ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
{
ggml_compute_forward_rms_norm_back(params, tensor->src[0], tensor->src[1], tensor);
} break;
+ case GGML_OP_GROUP_NORM:
+ {
+ ggml_compute_forward_group_norm(params, tensor->src[0], tensor);
+ } break;
case GGML_OP_MUL_MAT:
{
ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor);
{
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
} break;
+ case GGML_OP_UPSCALE:
+ {
+ ggml_compute_forward_upscale(params, tensor->src[0], tensor);
+ } break;
case GGML_OP_FLASH_ATTN:
{
const int32_t t = ggml_get_op_params_i32(tensor, 0);
inplace);
}
} break;
+ case GGML_OP_CONCAT:
+ {
+ GGML_ASSERT(false); // TODO: implement
+ } break;
case GGML_OP_SILU_BACK:
{
GGML_ASSERT(false); // TODO: not implemented
{
GGML_ASSERT(false); // TODO: not implemented
} break;
+ case GGML_OP_GROUP_NORM:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
case GGML_OP_MUL_MAT:
{
// https://cs231n.github.io/optimization-2/#staged
{
GGML_ASSERT(false); // TODO: not implemented
} break;
+ case GGML_OP_UPSCALE:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
case GGML_OP_FLASH_ATTN:
{
struct ggml_tensor * flash_grad = NULL;
case GGML_OP_NORM:
case GGML_OP_RMS_NORM:
case GGML_OP_RMS_NORM_BACK:
+ case GGML_OP_GROUP_NORM:
{
n_tasks = n_threads;
} break;
+ case GGML_OP_CONCAT:
case GGML_OP_MUL_MAT:
case GGML_OP_OUT_PROD:
{
{
n_tasks = 1;
} break;
+ case GGML_OP_UPSCALE:
+ {
+ n_tasks = n_threads;
+ } break;
case GGML_OP_FLASH_ATTN:
{
n_tasks = n_threads;