From: Borislav Stanimirov <redacted>
Date: Wed, 12 Jul 2023 10:43:30 +0000 (+0300)
Subject: ggml : basic implementation of 1d and 2d pools (#375)
X-Git-Tag: upstream/0.0.1642~1326
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=25b2d4ebac9fe718fb0ab1b773a9f59c0bbd0033;p=pkg%2Fggml%2Fsources%2Fggml

ggml : basic implementation of 1d and 2d pools (#375)

pools and tests

no lenet
---

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
index 8fe05d3a..3e9c06a6 100644
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -368,6 +368,8 @@ extern "C" {
         GGML_OP_CLAMP,
         GGML_OP_CONV_1D,
         GGML_OP_CONV_2D,
+        GGML_OP_POOL_1D,
+        GGML_OP_POOL_2D,
 
         GGML_OP_FLASH_ATTN,
         GGML_OP_FLASH_FF,
@@ -1173,6 +1175,31 @@ extern "C" {
             int                   s,
             int                   d);
 
+    enum ggml_pool_op {
+        GGML_POOL_MAX,
+        GGML_POOL_AVG,
+        GGML_NUM_POOL_OPS
+    };
+
+    GGML_API struct ggml_tensor* ggml_pool_1d(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            enum ggml_pool_op     op,
+            int                   k0, // kernel size
+            int                   s0, // stride
+            int                   p0); // padding
+
+    GGML_API struct ggml_tensor* ggml_pool_2d(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            enum ggml_pool_op     op,
+            int                   k0,
+            int                   k1,
+            int                   s0,
+            int                   s1,
+            int                   p0,
+            int                   p1);
+
     GGML_API struct ggml_tensor * ggml_flash_attn(
             struct ggml_context * ctx,
             struct ggml_tensor  * q,
diff --git a/src/ggml.c b/src/ggml.c
index 793ff709..32054a46 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -3787,6 +3787,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "CLAMP",
     "CONV_1D",
     "CONV_2D",
+    "POOL_1D",
+    "POOL_2D",
 
     "FLASH_ATTN",
     "FLASH_FF",
@@ -3805,7 +3807,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "CROSS_ENTROPY_LOSS_BACK",
 };
 
-static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66");
+static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
 
 static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "none",
@@ -3865,6 +3867,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "clamp(x)",
     "conv_1d(x)",
     "conv_2d(x)",
+    "pool_1d(x)",
+    "pool_2d(x)",
 
     "flash_attn(x)",
     "flash_ff(x)",
@@ -3883,7 +3887,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "cross_entropy_loss_back(x,y)",
 };
 
-static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66");
+static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
 
 static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
 static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
@@ -7214,6 +7218,98 @@ struct ggml_tensor* ggml_conv_1d_ph(
     return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
 }
 
+
+// ggml_pool_*
+
+static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
+    return (ins + 2 * p - ks) / s + 1;
+}
+
+// ggml_pool_2d
+
+struct ggml_tensor* ggml_pool_1d(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        enum ggml_pool_op     op,
+        int                   k0,
+        int                   s0,
+        int                   p0) {
+
+    bool is_node = false;
+
+    if (a->grad) {
+        GGML_ASSERT(false); // TODO: implement backward
+        is_node = true;
+    }
+
+    const int64_t ne[3] = {
+        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
+        a->ne[1],
+    };
+    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+
+    ggml_scratch_save(ctx);
+    struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
+    ((int32_t*)c->data)[0] = op;
+    ((int32_t*)c->data)[1] = k0;
+    ((int32_t*)c->data)[2] = s0;
+    ((int32_t*)c->data)[3] = p0;
+    ggml_scratch_load(ctx);
+
+    result->op = GGML_OP_POOL_1D;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+    result->src[1] = c;
+
+    return result;
+}
+
+// ggml_pool_2d
+
+struct ggml_tensor* ggml_pool_2d(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        enum ggml_pool_op     op,
+        int                   k0,
+        int                   k1,
+        int                   s0,
+        int                   s1,
+        int                   p0,
+        int                   p1) {
+
+    bool is_node = false;
+
+    if (a->grad) {
+        GGML_ASSERT(false); // TODO: implement backward
+        is_node = true;
+    }
+
+    const int64_t ne[3] = {
+        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
+        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
+        a->ne[2],
+    };
+    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
+
+    ggml_scratch_save(ctx);
+    struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 7);
+    ((int32_t*)c->data)[0] = op;
+    ((int32_t*)c->data)[1] = k0;
+    ((int32_t*)c->data)[2] = k1;
+    ((int32_t*)c->data)[3] = s0;
+    ((int32_t*)c->data)[4] = s1;
+    ((int32_t*)c->data)[5] = p0;
+    ((int32_t*)c->data)[6] = p1;
+    ggml_scratch_load(ctx);
+
+    result->op = GGML_OP_POOL_2D;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+    result->src[1] = c;
+
+    return result;
+}
+
 // ggml_flash_attn
 
 struct ggml_tensor * ggml_flash_attn(
@@ -13013,6 +13109,174 @@ static void ggml_compute_forward_conv_2d(
     };
 }
 
+// ggml_compute_forward_pool_1d_sk_p0
+
+static void ggml_compute_forward_pool_1d_sk_p0(
+        const struct ggml_compute_params * params,
+        const enum ggml_pool_op op,
+        const struct ggml_tensor * src,
+        const int k,
+        struct ggml_tensor * dst) {
+    assert(src->type == GGML_TYPE_F32);
+    assert(params->ith == 0);
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    const char* cdata = (const char*)src->data;
+    const char* const data_end = cdata + ggml_nbytes(src);
+    float* drow = (float*)dst->data;
+    const int64_t rs = dst->ne[0];
+    while (cdata < data_end) {
+        const float* const srow = (const float*)cdata;
+
+        int j = 0;
+        static_assert(GGML_NUM_POOL_OPS == 2, "GGML_NUM_POOL_OPS != 2");
+        for (int64_t i = 0; i < rs; ++i) {
+            switch (op) {
+            case GGML_POOL_AVG:
+                drow[i] = 0;
+                break;
+            case GGML_POOL_MAX:
+                drow[i] = -FLT_MAX;
+                break;
+            }
+            for (int ki = 0; ki < k; ++ki) {
+                switch (op) {
+                case GGML_POOL_AVG:
+                    drow[i] += srow[j];
+                    break;
+                case GGML_POOL_MAX:
+                    if (srow[j] > drow[i]) drow[i] = srow[j];
+                    break;
+                }
+                ++j;
+            }
+            switch (op) {
+            case GGML_POOL_AVG:
+                drow[i] /= k;
+                break;
+            }
+        }
+
+        cdata += src->nb[1];
+        drow += rs;
+    }
+}
+
+// ggml_compute_forward_pool_1d
+
+static void ggml_compute_forward_pool_1d(
+    const struct ggml_compute_params* params,
+    const struct ggml_tensor* src0,
+    const struct ggml_tensor* opt0,
+    struct ggml_tensor* dst) {
+    GGML_ASSERT(opt0->ne[0] == 4);
+    const int* opts = (const int*)opt0->data;
+    enum ggml_pool_op op = opts[0];
+    const int k0 = opts[1];
+    const int s0 = opts[2];
+    const int p0 = opts[3];
+    GGML_ASSERT(p0 == 0); // padding not supported
+    GGML_ASSERT(k0 == s0); // only s = k supported
+
+    ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst);
+}
+
+// ggml_compute_forward_pool_2d_sk_p0
+
+static void ggml_compute_forward_pool_2d_sk_p0(
+    const struct ggml_compute_params * params,
+    const enum ggml_pool_op op,
+    const struct ggml_tensor * src,
+    const int k0,
+    const int k1,
+    struct ggml_tensor * dst) {
+    assert(src->type == GGML_TYPE_F32);
+    assert(params->ith == 0);
+
+    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+        return;
+    }
+
+    const char* cdata = (const char*)src->data;
+    const char* const data_end = cdata + ggml_nbytes(src);
+
+    const int64_t px = dst->ne[0];
+    const int64_t py = dst->ne[1];
+    const int64_t pa = px * py;
+    float* dplane = (float*)dst->data;
+
+    const int ka = k0 * k1;
+
+    while (cdata < data_end) {
+        static_assert(GGML_NUM_POOL_OPS == 2, "GGML_NUM_POOL_OPS != 2");
+        for (int oy = 0; oy < py; ++oy) {
+            float * const drow = dplane + oy * px;
+            for (int ox = 0; ox < px; ++ox) {
+                float * const out =  drow + ox;
+                switch (op) {
+                case GGML_POOL_AVG:
+                    *out = 0;
+                    break;
+                case GGML_POOL_MAX:
+                    *out = -FLT_MAX;
+                    break;
+                }
+                const int ix = ox * k0;
+                const int iy = oy * k1;
+                for (int ky = 0; ky < k1; ++ky) {
+                    const float* const srow = (const float*)(cdata + src->nb[1] * (iy + ky));
+                    for (int kx = 0; kx < k0; ++kx) {
+                        int j = ix + kx;
+                        switch (op) {
+                        case GGML_POOL_AVG:
+                            *out += srow[j];
+                            break;
+                        case GGML_POOL_MAX:
+                            if (srow[j] > *out) *out = srow[j];
+                            break;
+                        }
+                    }
+                }
+                switch (op) {
+                case GGML_POOL_AVG:
+                    *out /= ka;
+                    break;
+                }
+            }
+        }
+
+        cdata += src->nb[2];
+        dplane += pa;
+    }
+}
+
+// ggml_compute_forward_pool_2d
+
+static void ggml_compute_forward_pool_2d(
+    const struct ggml_compute_params * params,
+    const struct ggml_tensor * src0,
+    const struct ggml_tensor * opt0,
+    struct ggml_tensor * dst) {
+    GGML_ASSERT(opt0->ne[0] == 7);
+    const int* opts = (const int*)opt0->data;
+    enum ggml_pool_op op = opts[0];
+    const int k0 = opts[1];
+    const int k1 = opts[2];
+    const int s0 = opts[3];
+    const int s1 = opts[4];
+    const int p0 = opts[5];
+    const int p1 = opts[6];
+    GGML_ASSERT(p0 == 0);
+    GGML_ASSERT(p1 == 0); // padding not supported
+    GGML_ASSERT(k0 == s0);
+    GGML_ASSERT(k1 == s1); // only s = k supported
+
+    ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst);
+}
+
 
 // ggml_compute_forward_flash_attn
 
@@ -14794,6 +15058,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
             } break;
+        case GGML_OP_POOL_1D:
+            {
+                ggml_compute_forward_pool_1d(params, tensor->src[0], tensor->src[1], tensor);
+            } break;
+        case GGML_OP_POOL_2D:
+            {
+                ggml_compute_forward_pool_2d(params, tensor->src[0], tensor->src[1], tensor);
+            } break;
         case GGML_OP_FLASH_ATTN:
             {
                 const int32_t t = ggml_get_i32_1d(tensor->src[3], 0);
@@ -15494,6 +15766,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
             {
                 GGML_ASSERT(false); // TODO: not implemented
             } break;
+        case GGML_OP_POOL_1D:
+            {
+                GGML_ASSERT(false); // TODO: not implemented
+            } break;
+        case GGML_OP_POOL_2D:
+            {
+                GGML_ASSERT(false); // TODO: not implemented
+            } break;
         case GGML_OP_FLASH_ATTN:
             {
                 struct ggml_tensor * flash_grad = NULL;
@@ -16315,6 +16595,11 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
 
                     work_size = MAX(work_size, cur);
                 } break;
+            case GGML_OP_POOL_1D:
+            case GGML_OP_POOL_2D:
+                {
+                    n_tasks = 1;
+                } break;
             case GGML_OP_FLASH_ATTN:
                 {
                     n_tasks = n_threads;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 89c2aa6d..e787e307 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -256,6 +256,14 @@ add_executable(${TEST_TARGET} ${TEST_TARGET}.c)
 target_link_libraries(${TEST_TARGET} PRIVATE ggml)
 add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
 
+#
+# test-pool
+
+set(TEST_TARGET test-pool)
+add_executable(${TEST_TARGET} ${TEST_TARGET}.c)
+target_link_libraries(${TEST_TARGET} PRIVATE ggml)
+add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
+
 #
 # test-svd0 (arm/x86)
 
diff --git a/tests/test-pool.c b/tests/test-pool.c
new file mode 100644
index 00000000..14f831f1
--- /dev/null
+++ b/tests/test-pool.c
@@ -0,0 +1,142 @@
+#include "ggml/ggml.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct ggml_context* make_ctx(void) {
+    struct ggml_init_params params = {
+        .mem_size = 2 * 1024 * 1024,
+    };
+
+    return ggml_init(params);
+}
+
+int main(int argc, const char** argv) {
+
+    float buf_f32[1024];
+    for (int i = 0; i < 1024; ++i) {
+        buf_f32[i] = (float)(i + 1);
+    }
+
+    // avg pool 1d
+    {
+        struct ggml_context* ctx = make_ctx();
+        struct ggml_tensor* t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+        memcpy(t->data, buf_f32, ggml_nbytes(t));
+
+        struct ggml_tensor* t_pooled = ggml_pool_1d(ctx, t, GGML_POOL_AVG, 3, 3, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 1);
+
+        struct ggml_cgraph graph = ggml_build_forward(t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, &graph, 4);
+
+        const float* output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 2);
+        GGML_ASSERT(output[1] == 5);
+        GGML_ASSERT(output[2] == 8);
+        GGML_ASSERT(output[3] == 12);
+        GGML_ASSERT(output[4] == 15);
+        GGML_ASSERT(output[5] == 18);
+
+        ggml_free(ctx);
+    }
+
+    // max pool 1d
+    {
+        struct ggml_context* ctx = make_ctx();
+        struct ggml_tensor* t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
+        memcpy(t->data, buf_f32, ggml_nbytes(t));
+
+        struct ggml_tensor* t_pooled = ggml_pool_1d(ctx, t, GGML_POOL_MAX, 3, 3, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 1);
+
+        struct ggml_cgraph graph = ggml_build_forward(t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, &graph, 4);
+
+        const float* output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 3);
+        GGML_ASSERT(output[1] == 6);
+        GGML_ASSERT(output[2] == 9);
+        GGML_ASSERT(output[3] == 13);
+        GGML_ASSERT(output[4] == 16);
+        GGML_ASSERT(output[5] == 19);
+
+        ggml_free(ctx);
+    }
+
+    // avg pool 2d
+    {
+        struct ggml_context* ctx = make_ctx();
+        struct ggml_tensor* t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
+        memcpy(t->data, buf_f32, ggml_nbytes(t));
+
+        struct ggml_tensor* t_pooled = ggml_pool_2d(ctx, t, GGML_POOL_AVG, 3, 4, 3, 4, 0, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 2);
+        GGML_ASSERT(t_pooled->ne[3] == 1);
+
+        struct ggml_cgraph graph = ggml_build_forward(t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, &graph, 4);
+
+        const float* output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 17);
+        GGML_ASSERT(output[1] == 20);
+        GGML_ASSERT(output[2] == 23);
+        GGML_ASSERT(output[3] == 57);
+        GGML_ASSERT(output[4] == 60);
+        GGML_ASSERT(output[5] == 63);
+        GGML_ASSERT(output[6] == 117);
+        GGML_ASSERT(output[7] == 120);
+        GGML_ASSERT(output[8] == 123);
+        GGML_ASSERT(output[9] == 157);
+        GGML_ASSERT(output[10] == 160);
+        GGML_ASSERT(output[11] == 163);
+
+
+        ggml_free(ctx);
+    }
+
+    // max pool 2d
+    {
+        struct ggml_context* ctx = make_ctx();
+        struct ggml_tensor* t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
+        memcpy(t->data, buf_f32, ggml_nbytes(t));
+
+        struct ggml_tensor* t_pooled = ggml_pool_2d(ctx, t, GGML_POOL_MAX, 3, 4, 3, 4, 0, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 2);
+        GGML_ASSERT(t_pooled->ne[3] == 1);
+
+        struct ggml_cgraph graph = ggml_build_forward(t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, &graph, 4);
+
+        const float* output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 33);
+        GGML_ASSERT(output[1] == 36);
+        GGML_ASSERT(output[2] == 39);
+        GGML_ASSERT(output[3] == 73);
+        GGML_ASSERT(output[4] == 76);
+        GGML_ASSERT(output[5] == 79);
+        GGML_ASSERT(output[6] == 133);
+        GGML_ASSERT(output[7] == 136);
+        GGML_ASSERT(output[8] == 139);
+        GGML_ASSERT(output[9] == 173);
+        GGML_ASSERT(output[10] == 176);
+        GGML_ASSERT(output[11] == 179);
+
+        ggml_free(ctx);
+    }
+
+    return 0;
+}