From: Borislav Stanimirov Date: Wed, 12 Jul 2023 10:43:30 +0000 (+0300) Subject: ggml : basic implementation of 1d and 2d pools (#375) X-Git-Tag: upstream/0.0.1642~1326 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=25b2d4ebac9fe718fb0ab1b773a9f59c0bbd0033;p=pkg%2Fggml%2Fsources%2Fggml ggml : basic implementation of 1d and 2d pools (#375) pools and tests no lenet --- diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h index 8fe05d3a..3e9c06a6 100644 --- a/include/ggml/ggml.h +++ b/include/ggml/ggml.h @@ -368,6 +368,8 @@ extern "C" { GGML_OP_CLAMP, GGML_OP_CONV_1D, GGML_OP_CONV_2D, + GGML_OP_POOL_1D, + GGML_OP_POOL_2D, GGML_OP_FLASH_ATTN, GGML_OP_FLASH_FF, @@ -1173,6 +1175,31 @@ extern "C" { int s, int d); + enum ggml_pool_op { + GGML_POOL_MAX, + GGML_POOL_AVG, + GGML_NUM_POOL_OPS + }; + + GGML_API struct ggml_tensor* ggml_pool_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_pool_op op, + int k0, // kernel size + int s0, // stride + int p0); // padding + + GGML_API struct ggml_tensor* ggml_pool_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_pool_op op, + int k0, + int k1, + int s0, + int s1, + int p0, + int p1); + GGML_API struct ggml_tensor * ggml_flash_attn( struct ggml_context * ctx, struct ggml_tensor * q, diff --git a/src/ggml.c b/src/ggml.c index 793ff709..32054a46 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -3787,6 +3787,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CLAMP", "CONV_1D", "CONV_2D", + "POOL_1D", + "POOL_2D", "FLASH_ATTN", "FLASH_FF", @@ -3805,7 +3807,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CROSS_ENTROPY_LOSS_BACK", }; -static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66"); +static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -3865,6 +3867,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "clamp(x)", "conv_1d(x)", "conv_2d(x)", + "pool_1d(x)", + "pool_2d(x)", "flash_attn(x)", "flash_ff(x)", @@ -3883,7 +3887,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "cross_entropy_loss_back(x,y)", }; -static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66"); +static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); @@ -7214,6 +7218,98 @@ struct ggml_tensor* ggml_conv_1d_ph( return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d); } + +// ggml_pool_* + +static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) { + return (ins + 2 * p - ks) / s + 1; +} + +// ggml_pool_2d + +struct ggml_tensor* ggml_pool_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_pool_op op, + int k0, + int s0, + int p0) { + + bool is_node = false; + + if (a->grad) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + const int64_t ne[3] = { + ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), + a->ne[1], + }; + struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); + + ggml_scratch_save(ctx); + struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4); + ((int32_t*)c->data)[0] = op; + ((int32_t*)c->data)[1] = k0; + ((int32_t*)c->data)[2] = s0; + ((int32_t*)c->data)[3] = p0; + ggml_scratch_load(ctx); + + result->op = GGML_OP_POOL_1D; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + result->src[1] = c; + + return result; +} + +// ggml_pool_2d + +struct ggml_tensor* ggml_pool_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_pool_op op, + int k0, + int k1, + int s0, + int s1, + int p0, + int p1) { + + bool is_node = false; + + if (a->grad) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + const int64_t ne[3] = { + ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), + ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), + a->ne[2], + }; + struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); + + ggml_scratch_save(ctx); + struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 7); + ((int32_t*)c->data)[0] = op; + ((int32_t*)c->data)[1] = k0; + ((int32_t*)c->data)[2] = k1; + ((int32_t*)c->data)[3] = s0; + ((int32_t*)c->data)[4] = s1; + ((int32_t*)c->data)[5] = p0; + ((int32_t*)c->data)[6] = p1; + ggml_scratch_load(ctx); + + result->op = GGML_OP_POOL_2D; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + result->src[1] = c; + + return result; +} + // ggml_flash_attn struct ggml_tensor * ggml_flash_attn( @@ -13013,6 +13109,174 @@ static void ggml_compute_forward_conv_2d( }; } +// ggml_compute_forward_pool_1d_sk_p0 + +static void ggml_compute_forward_pool_1d_sk_p0( + const struct ggml_compute_params * params, + const enum ggml_pool_op op, + const struct ggml_tensor * src, + const int k, + struct ggml_tensor * dst) { + assert(src->type == GGML_TYPE_F32); + assert(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const char* cdata = (const char*)src->data; + const char* const data_end = cdata + ggml_nbytes(src); + float* drow = (float*)dst->data; + const int64_t rs = dst->ne[0]; + while (cdata < data_end) { + const float* const srow = (const float*)cdata; + + int j = 0; + static_assert(GGML_NUM_POOL_OPS == 2, "GGML_NUM_POOL_OPS != 2"); + for (int64_t i = 0; i < rs; ++i) { + switch (op) { + case GGML_POOL_AVG: + drow[i] = 0; + break; + case GGML_POOL_MAX: + drow[i] = -FLT_MAX; + break; + } + for (int ki = 0; ki < k; ++ki) { + switch (op) { + case GGML_POOL_AVG: + drow[i] += srow[j]; + break; + case GGML_POOL_MAX: + if (srow[j] > drow[i]) drow[i] = srow[j]; + break; + } + ++j; + } + switch (op) { + case GGML_POOL_AVG: + drow[i] /= k; + break; + } + } + + cdata += src->nb[1]; + drow += rs; + } +} + +// ggml_compute_forward_pool_1d + +static void ggml_compute_forward_pool_1d( + const struct ggml_compute_params* params, + const struct ggml_tensor* src0, + const struct ggml_tensor* opt0, + struct ggml_tensor* dst) { + GGML_ASSERT(opt0->ne[0] == 4); + const int* opts = (const int*)opt0->data; + enum ggml_pool_op op = opts[0]; + const int k0 = opts[1]; + const int s0 = opts[2]; + const int p0 = opts[3]; + GGML_ASSERT(p0 == 0); // padding not supported + GGML_ASSERT(k0 == s0); // only s = k supported + + ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst); +} + +// ggml_compute_forward_pool_2d_sk_p0 + +static void ggml_compute_forward_pool_2d_sk_p0( + const struct ggml_compute_params * params, + const enum ggml_pool_op op, + const struct ggml_tensor * src, + const int k0, + const int k1, + struct ggml_tensor * dst) { + assert(src->type == GGML_TYPE_F32); + assert(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const char* cdata = (const char*)src->data; + const char* const data_end = cdata + ggml_nbytes(src); + + const int64_t px = dst->ne[0]; + const int64_t py = dst->ne[1]; + const int64_t pa = px * py; + float* dplane = (float*)dst->data; + + const int ka = k0 * k1; + + while (cdata < data_end) { + static_assert(GGML_NUM_POOL_OPS == 2, "GGML_NUM_POOL_OPS != 2"); + for (int oy = 0; oy < py; ++oy) { + float * const drow = dplane + oy * px; + for (int ox = 0; ox < px; ++ox) { + float * const out = drow + ox; + switch (op) { + case GGML_POOL_AVG: + *out = 0; + break; + case GGML_POOL_MAX: + *out = -FLT_MAX; + break; + } + const int ix = ox * k0; + const int iy = oy * k1; + for (int ky = 0; ky < k1; ++ky) { + const float* const srow = (const float*)(cdata + src->nb[1] * (iy + ky)); + for (int kx = 0; kx < k0; ++kx) { + int j = ix + kx; + switch (op) { + case GGML_POOL_AVG: + *out += srow[j]; + break; + case GGML_POOL_MAX: + if (srow[j] > *out) *out = srow[j]; + break; + } + } + } + switch (op) { + case GGML_POOL_AVG: + *out /= ka; + break; + } + } + } + + cdata += src->nb[2]; + dplane += pa; + } +} + +// ggml_compute_forward_pool_2d + +static void ggml_compute_forward_pool_2d( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + GGML_ASSERT(opt0->ne[0] == 7); + const int* opts = (const int*)opt0->data; + enum ggml_pool_op op = opts[0]; + const int k0 = opts[1]; + const int k1 = opts[2]; + const int s0 = opts[3]; + const int s1 = opts[4]; + const int p0 = opts[5]; + const int p1 = opts[6]; + GGML_ASSERT(p0 == 0); + GGML_ASSERT(p1 == 0); // padding not supported + GGML_ASSERT(k0 == s0); + GGML_ASSERT(k1 == s1); // only s = k supported + + ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst); +} + // ggml_compute_forward_flash_attn @@ -14794,6 +15058,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; + case GGML_OP_POOL_1D: + { + ggml_compute_forward_pool_1d(params, tensor->src[0], tensor->src[1], tensor); + } break; + case GGML_OP_POOL_2D: + { + ggml_compute_forward_pool_2d(params, tensor->src[0], tensor->src[1], tensor); + } break; case GGML_OP_FLASH_ATTN: { const int32_t t = ggml_get_i32_1d(tensor->src[3], 0); @@ -15494,6 +15766,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor { GGML_ASSERT(false); // TODO: not implemented } break; + case GGML_OP_POOL_1D: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_POOL_2D: + { + GGML_ASSERT(false); // TODO: not implemented + } break; case GGML_OP_FLASH_ATTN: { struct ggml_tensor * flash_grad = NULL; @@ -16315,6 +16595,11 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { work_size = MAX(work_size, cur); } break; + case GGML_OP_POOL_1D: + case GGML_OP_POOL_2D: + { + n_tasks = 1; + } break; case GGML_OP_FLASH_ATTN: { n_tasks = n_threads; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 89c2aa6d..e787e307 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -256,6 +256,14 @@ add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +# +# test-pool + +set(TEST_TARGET test-pool) +add_executable(${TEST_TARGET} ${TEST_TARGET}.c) +target_link_libraries(${TEST_TARGET} PRIVATE ggml) +add_test(NAME ${TEST_TARGET} COMMAND $) + # # test-svd0 (arm/x86) diff --git a/tests/test-pool.c b/tests/test-pool.c new file mode 100644 index 00000000..14f831f1 --- /dev/null +++ b/tests/test-pool.c @@ -0,0 +1,142 @@ +#include "ggml/ggml.h" + +#include +#include +#include + +struct ggml_context* make_ctx(void) { + struct ggml_init_params params = { + .mem_size = 2 * 1024 * 1024, + }; + + return ggml_init(params); +} + +int main(int argc, const char** argv) { + + float buf_f32[1024]; + for (int i = 0; i < 1024; ++i) { + buf_f32[i] = (float)(i + 1); + } + + // avg pool 1d + { + struct ggml_context* ctx = make_ctx(); + struct ggml_tensor* t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t->data, buf_f32, ggml_nbytes(t)); + + struct ggml_tensor* t_pooled = ggml_pool_1d(ctx, t, GGML_POOL_AVG, 3, 3, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 1); + + struct ggml_cgraph graph = ggml_build_forward(t_pooled); + + ggml_graph_compute_with_ctx(ctx, &graph, 4); + + const float* output = ggml_get_data_f32(t_pooled); + GGML_ASSERT(output[0] == 2); + GGML_ASSERT(output[1] == 5); + GGML_ASSERT(output[2] == 8); + GGML_ASSERT(output[3] == 12); + GGML_ASSERT(output[4] == 15); + GGML_ASSERT(output[5] == 18); + + ggml_free(ctx); + } + + // max pool 1d + { + struct ggml_context* ctx = make_ctx(); + struct ggml_tensor* t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); + memcpy(t->data, buf_f32, ggml_nbytes(t)); + + struct ggml_tensor* t_pooled = ggml_pool_1d(ctx, t, GGML_POOL_MAX, 3, 3, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 1); + + struct ggml_cgraph graph = ggml_build_forward(t_pooled); + + ggml_graph_compute_with_ctx(ctx, &graph, 4); + + const float* output = ggml_get_data_f32(t_pooled); + GGML_ASSERT(output[0] == 3); + GGML_ASSERT(output[1] == 6); + GGML_ASSERT(output[2] == 9); + GGML_ASSERT(output[3] == 13); + GGML_ASSERT(output[4] == 16); + GGML_ASSERT(output[5] == 19); + + ggml_free(ctx); + } + + // avg pool 2d + { + struct ggml_context* ctx = make_ctx(); + struct ggml_tensor* t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2); + memcpy(t->data, buf_f32, ggml_nbytes(t)); + + struct ggml_tensor* t_pooled = ggml_pool_2d(ctx, t, GGML_POOL_AVG, 3, 4, 3, 4, 0, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 2); + GGML_ASSERT(t_pooled->ne[3] == 1); + + struct ggml_cgraph graph = ggml_build_forward(t_pooled); + + ggml_graph_compute_with_ctx(ctx, &graph, 4); + + const float* output = ggml_get_data_f32(t_pooled); + GGML_ASSERT(output[0] == 17); + GGML_ASSERT(output[1] == 20); + GGML_ASSERT(output[2] == 23); + GGML_ASSERT(output[3] == 57); + GGML_ASSERT(output[4] == 60); + GGML_ASSERT(output[5] == 63); + GGML_ASSERT(output[6] == 117); + GGML_ASSERT(output[7] == 120); + GGML_ASSERT(output[8] == 123); + GGML_ASSERT(output[9] == 157); + GGML_ASSERT(output[10] == 160); + GGML_ASSERT(output[11] == 163); + + + ggml_free(ctx); + } + + // max pool 2d + { + struct ggml_context* ctx = make_ctx(); + struct ggml_tensor* t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2); + memcpy(t->data, buf_f32, ggml_nbytes(t)); + + struct ggml_tensor* t_pooled = ggml_pool_2d(ctx, t, GGML_POOL_MAX, 3, 4, 3, 4, 0, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 2); + GGML_ASSERT(t_pooled->ne[3] == 1); + + struct ggml_cgraph graph = ggml_build_forward(t_pooled); + + ggml_graph_compute_with_ctx(ctx, &graph, 4); + + const float* output = ggml_get_data_f32(t_pooled); + GGML_ASSERT(output[0] == 33); + GGML_ASSERT(output[1] == 36); + GGML_ASSERT(output[2] == 39); + GGML_ASSERT(output[3] == 73); + GGML_ASSERT(output[4] == 76); + GGML_ASSERT(output[5] == 79); + GGML_ASSERT(output[6] == 133); + GGML_ASSERT(output[7] == 136); + GGML_ASSERT(output[8] == 139); + GGML_ASSERT(output[9] == 173); + GGML_ASSERT(output[10] == 176); + GGML_ASSERT(output[11] == 179); + + ggml_free(ctx); + } + + return 0; +}