int d1, // dilation dimension 1
bool is_2D);
- GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
- struct ggml_context * ctx,
- struct ggml_tensor * a, // convolution kernel
- struct ggml_tensor * b, // data
- int s0, // stride dimension 0
- int s1, // stride dimension 1
- int p0, // padding dimension 0
- int p1, // padding dimension 1
- int d0, // dilation dimension 0
- int d1); // dilation dimension 1
-
GGML_API struct ggml_tensor * ggml_conv_1d(
struct ggml_context * ctx,
struct ggml_tensor * a, // convolution kernel
int s, // stride
int d); // dilation
+ // depthwise
+ // TODO: this is very likely wrong for some cases! - needs more testing
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s0, // stride
+ int p0, // padding
+ int d0); // dilation
+
+ GGML_API struct ggml_tensor * ggml_conv_1d_dw_ph(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s0, // stride
+ int d0); // dilation
+
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
struct ggml_context * ctx,
struct ggml_tensor * a, // convolution kernel
int d0, // dilation dimension 0
int d1); // dilation dimension 1
-
// kernel size is a->ne[0] x a->ne[1]
// stride is equal to kernel size
// padding is zero
struct ggml_tensor * a,
struct ggml_tensor * b);
+ // depthwise
+ GGML_API struct ggml_tensor * ggml_conv_2d_dw(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a, // convolution kernel
+ struct ggml_tensor * b, // data
+ int s0, // stride dimension 0
+ int s1, // stride dimension 1
+ int p0, // padding dimension 0
+ int p1, // padding dimension 1
+ int d0, // dilation dimension 0
+ int d1); // dilation dimension 1
+
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
struct ggml_context * ctx,
struct ggml_tensor * a,
return result;
}
-// ggml_conv_1d
-
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
}
-GGML_API struct ggml_tensor * ggml_conv_1d(
+// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
+// a: [OC,IC, KH, KW]
+// b: [N, IC, IH, IW]
+// result: [N, OH, OW, IC*KH*KW]
+struct ggml_tensor * ggml_im2col(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ int s0,
+ int s1,
+ int p0,
+ int p1,
+ int d0,
+ int d1,
+ bool is_2D,
+ enum ggml_type dst_type) {
+ if (is_2D) {
+ GGML_ASSERT(a->ne[2] == b->ne[2]);
+ } else {
+ //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
+ GGML_ASSERT(b->ne[1] == a->ne[1]);
+ GGML_ASSERT(b->ne[3] == 1);
+ }
+
+ const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
+ const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
+
+ GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
+ GGML_ASSERT((OW > 0) && "b too small compared to a");
+
+ const int64_t ne[4] = {
+ is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
+ OW,
+ is_2D ? OH : b->ne[2],
+ is_2D ? b->ne[3] : 1,
+ };
+
+ struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
+ ggml_set_op_params(result, params, sizeof(params));
+
+ result->op = GGML_OP_IM2COL;
+ result->src[0] = a;
+ result->src[1] = b;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_im2col_back(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ int64_t * ne,
+ int s0,
+ int s1,
+ int p0,
+ int p1,
+ int d0,
+ int d1,
+ bool is_2D) {
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
+ int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
+ ggml_set_op_params(result, params, sizeof(params));
+
+ result->op = GGML_OP_IM2COL_BACK;
+ result->src[0] = a;
+ result->src[1] = b;
+
+ return result;
+}
+
+// ggml_conv_1d
+
+struct ggml_tensor * ggml_conv_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
}
-// ggml_conv_transpose_1d
-
-static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
- return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
-}
+// ggml_conv_1d_dw
-GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
+struct ggml_tensor * ggml_conv_1d_dw(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
int p0,
int d0) {
- GGML_ASSERT(ggml_is_matrix(b));
- GGML_ASSERT(a->ne[2] == b->ne[1]);
- GGML_ASSERT(a->ne[3] == 1);
+ struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
+ struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
- GGML_ASSERT(p0 == 0);
- GGML_ASSERT(d0 == 1);
+ struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
- const int64_t ne[4] = {
- ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
- a->ne[1], b->ne[2], 1,
- };
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
+ struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
- int32_t params[] = { s0, p0, d0 };
- ggml_set_op_params(result, params, sizeof(params));
-
- result->op = GGML_OP_CONV_TRANSPOSE_1D;
- result->src[0] = a;
- result->src[1] = b;
+ result = ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
return result;
}
-// ggml_conv_depthwise
+// ggml_conv_1d_dw_ph
-struct ggml_tensor * ggml_conv_depthwise_2d(
+struct ggml_tensor * ggml_conv_1d_dw_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
- int s1,
- int p0,
- int p1,
- int d0,
- int d1) {
- struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
- struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
- ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
- s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
- struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
+ int d0) {
+ return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
+}
- new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
- struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
- result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
+// ggml_conv_transpose_1d
- return result;
+static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
+ return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
}
-// ggml_conv_2d
-// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
-// a: [OC,IC, KH, KW]
-// b: [N, IC, IH, IW]
-// result: [N, OH, OW, IC*KH*KW]
-struct ggml_tensor * ggml_im2col(
+GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int s0,
- int s1,
int p0,
- int p1,
- int d0,
- int d1,
- bool is_2D,
- enum ggml_type dst_type) {
- if(is_2D) {
- GGML_ASSERT(a->ne[2] == b->ne[2]);
- } else {
- GGML_ASSERT(a->ne[1] == b->ne[1]);
- GGML_ASSERT(b->ne[3] == 1);
- }
-
- const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
- const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
+ int d0) {
+ GGML_ASSERT(ggml_is_matrix(b));
+ GGML_ASSERT(a->ne[2] == b->ne[1]);
+ GGML_ASSERT(a->ne[3] == 1);
- GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
- GGML_ASSERT((OW > 0) && "b too small compared to a");
+ GGML_ASSERT(p0 == 0);
+ GGML_ASSERT(d0 == 1);
const int64_t ne[4] = {
- is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
- OW,
- is_2D ? OH : b->ne[2],
- is_2D ? b->ne[3] : 1,
+ ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
+ a->ne[1], b->ne[2], 1,
};
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
- struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
+ int32_t params[] = { s0, p0, d0 };
ggml_set_op_params(result, params, sizeof(params));
- result->op = GGML_OP_IM2COL;
+ result->op = GGML_OP_CONV_TRANSPOSE_1D;
result->src[0] = a;
result->src[1] = b;
return result;
}
-struct ggml_tensor * ggml_im2col_back(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b,
- int64_t * ne,
- int s0,
- int s1,
- int p0,
- int p1,
- int d0,
- int d1,
- bool is_2D) {
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
- int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
- ggml_set_op_params(result, params, sizeof(params));
-
- result->op = GGML_OP_IM2COL_BACK;
- result->src[0] = a;
- result->src[1] = b;
-
- return result;
-}
+// ggml_conv_2d
// a: [OC,IC, KH, KW]
// b: [N, IC, IH, IW]
return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
}
+// ggml_conv_2d_dw
+
+struct ggml_tensor * ggml_conv_2d_dw(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ int s0,
+ int s1,
+ int p0,
+ int p1,
+ int d0,
+ int d1) {
+ struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
+ struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
+ ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
+ s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
+ struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
+
+ new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
+ struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
+ result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
+
+ return result;
+}
+
// ggml_conv_transpose_2d_p0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {