From: Ivan Filipov <redacted>
Date: Mon, 22 Jul 2024 11:32:02 +0000 (+0300)
Subject: ggml: add support for float16 input tensors in pooling operations (#895)
X-Git-Tag: upstream/0.0.1642~530
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2560b8ddc6c106c1334b5d6350632718e1c87590;p=pkg%2Fggml%2Fsources%2Fggml

ggml: add support for float16 input tensors in pooling operations (#895)

* Add support for float16 tensors in 1d pooling operations

* Add support for float16 input tensors in 2d pooling operations

* code cleanup

remove unnecessary casting during srow ptr initialization

---------

Co-authored-by: vanaka11 <redacted>
---

diff --git a/src/ggml.c b/src/ggml.c
index ae7bee5e..46beec5e 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -14579,7 +14579,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
 
     const struct ggml_tensor * src = dst->src[0];
 
-    assert(src->type == GGML_TYPE_F32);
+    assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
 
     if (params->ith != 0) {
         return;
@@ -14592,10 +14592,8 @@ static void ggml_compute_forward_pool_1d_sk_p0(
     const int64_t rs = dst->ne[0];
 
     while (cdata < data_end) {
-        const float * const srow = (const float *)cdata;
-
+        const void * srow = (const void *)cdata;
         int j = 0;
-
         for (int64_t i = 0; i < rs; ++i) {
             switch (op) {
                 case GGML_OP_POOL_AVG:   drow[i] = 0;        break;
@@ -14603,10 +14601,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
                 case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
             }
             for (int ki = 0; ki < k; ++ki) {
+                const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
                 switch (op) {
-                    case GGML_OP_POOL_AVG:                          drow[i] += srow[j]; break;
-                    case GGML_OP_POOL_MAX:   if (srow[j] > drow[i]) drow[i]  = srow[j]; break;
-                    case GGML_OP_POOL_COUNT:                        GGML_ASSERT(false); break;
+                    case GGML_OP_POOL_AVG:                         drow[i] += srow_j; break;
+                    case GGML_OP_POOL_MAX:   if (srow_j > drow[i]) drow[i]  = srow_j; break;
+                    case GGML_OP_POOL_COUNT:                       GGML_ASSERT(false); break;
                 }
                 ++j;
             }
@@ -14647,7 +14646,7 @@ static void ggml_compute_forward_pool_2d(
 
     const struct ggml_tensor * src = dst->src[0];
 
-    GGML_ASSERT(src->type == GGML_TYPE_F32);
+    assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
 
     if (params->ith != 0) {
         return;
@@ -14690,14 +14689,15 @@ static void ggml_compute_forward_pool_2d(
 
                 for (int ky = 0; ky < k1; ++ky) {
                     if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
-                    const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
+                    const void * srow = (const void *)(cdata + src->nb[1] * (iy + ky));
                     for (int kx = 0; kx < k0; ++kx) {
                         int j = ix + kx;
                         if (j < 0 || j >= src->ne[0]) continue;
+                        const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
                         switch (op) {
-                            case GGML_OP_POOL_AVG:                     *out += srow[j]; break;
-                            case GGML_OP_POOL_MAX: if (srow[j] > *out) *out  = srow[j]; break;
-                            case GGML_OP_POOL_COUNT:                GGML_ASSERT(false); break;
+                            case GGML_OP_POOL_AVG:                     *out += srow_j; break;
+                            case GGML_OP_POOL_MAX: if (srow_j > *out)  *out  = srow_j; break;
+                            case GGML_OP_POOL_COUNT:               GGML_ASSERT(false); break;
                         }
                     }
                 }
diff --git a/tests/test-pool.c b/tests/test-pool.c
index d1252927..f943dc61 100644
--- a/tests/test-pool.c
+++ b/tests/test-pool.c
@@ -15,11 +15,13 @@ struct ggml_context* make_ctx(void) {
 int main(int argc, const char** argv) {
 
     float buf_f32[1024];
+    ggml_fp16_t buf_f16[1024];
     for (int i = 0; i < 1024; ++i) {
         buf_f32[i] = (float)(i + 1);
+        buf_f16[i] = ggml_fp32_to_fp16(buf_f32[i]);
     }
 
-    // avg pool 1d
+    // avg pool 1d - Float 32
     {
         struct ggml_context * ctx = make_ctx();
         struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
@@ -47,7 +49,35 @@ int main(int argc, const char** argv) {
         ggml_free(ctx);
     }
 
-    // max pool 1d
+    // avg pool 1d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_AVG, 3, 3, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+
+        GGML_ASSERT(output[0] == 2);
+        GGML_ASSERT(output[1] == 5);
+        GGML_ASSERT(output[2] == 8);
+        GGML_ASSERT(output[3] == 12);
+        GGML_ASSERT(output[4] == 15);
+        GGML_ASSERT(output[5] == 18);
+
+        ggml_free(ctx);
+    }
+
+    // max pool 1d - Float 32
     {
         struct ggml_context * ctx = make_ctx();
         struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
@@ -74,7 +104,34 @@ int main(int argc, const char** argv) {
         ggml_free(ctx);
     }
 
-    // avg pool 2d
+    // max pool 1d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_MAX, 3, 3, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 3);
+        GGML_ASSERT(output[1] == 6);
+        GGML_ASSERT(output[2] == 9);
+        GGML_ASSERT(output[3] == 13);
+        GGML_ASSERT(output[4] == 16);
+        GGML_ASSERT(output[5] == 19);
+
+        ggml_free(ctx);
+    }
+
+    // avg pool 2d - Float 32
     {
         struct ggml_context * ctx = make_ctx();
         struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
@@ -109,7 +166,42 @@ int main(int argc, const char** argv) {
         ggml_free(ctx);
     }
 
-    // max pool 2d
+    // avg pool 2d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_AVG, 3, 4, 3, 4, 0, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 2);
+        GGML_ASSERT(t_pooled->ne[3] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 17);
+        GGML_ASSERT(output[1] == 20);
+        GGML_ASSERT(output[2] == 23);
+        GGML_ASSERT(output[3] == 57);
+        GGML_ASSERT(output[4] == 60);
+        GGML_ASSERT(output[5] == 63);
+        GGML_ASSERT(output[6] == 117);
+        GGML_ASSERT(output[7] == 120);
+        GGML_ASSERT(output[8] == 123);
+        GGML_ASSERT(output[9] == 157);
+        GGML_ASSERT(output[10] == 160);
+        GGML_ASSERT(output[11] == 163);
+
+
+        ggml_free(ctx);
+    }
+
+    // max pool 2d - Float 32
     {
         struct ggml_context * ctx = make_ctx();
         struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
@@ -143,5 +235,39 @@ int main(int argc, const char** argv) {
         ggml_free(ctx);
     }
 
+    // max pool 2d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_MAX, 3, 4, 3, 4, 0, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 2);
+        GGML_ASSERT(t_pooled->ne[3] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 33);
+        GGML_ASSERT(output[1] == 36);
+        GGML_ASSERT(output[2] == 39);
+        GGML_ASSERT(output[3] == 73);
+        GGML_ASSERT(output[4] == 76);
+        GGML_ASSERT(output[5] == 79);
+        GGML_ASSERT(output[6] == 133);
+        GGML_ASSERT(output[7] == 136);
+        GGML_ASSERT(output[8] == 139);
+        GGML_ASSERT(output[9] == 173);
+        GGML_ASSERT(output[10] == 176);
+        GGML_ASSERT(output[11] == 179);
+
+        ggml_free(ctx);
+    }
+
     return 0;
 }