ggml: add support for float16 input tensors in pooling operations (#895)

author Ivan Filipov <redacted>

Mon, 22 Jul 2024 11:32:02 +0000 (14:32 +0300)

committer GitHub <redacted>

Mon, 22 Jul 2024 11:32:02 +0000 (14:32 +0300)
author Ivan Filipov <redacted>
Mon, 22 Jul 2024 11:32:02 +0000 (14:32 +0300)
committer GitHub <redacted>
Mon, 22 Jul 2024 11:32:02 +0000 (14:32 +0300)
diff --git a/src/ggml.c b/src/ggml.c

index ae7bee5e3e88a67c853b38089cb923fc42bc71f0..46beec5ef46171af400af3487e98e552a1ad4092 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -14579,7 +14579,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
  
      const struct ggml_tensor * src = dst->src[0];
  
-    assert(src->type == GGML_TYPE_F32);
+    assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
  
      if (params->ith != 0) {
          return;
@@ -14592,10 +14592,8 @@ static void ggml_compute_forward_pool_1d_sk_p0(
      const int64_t rs = dst->ne[0];
  
      while (cdata < data_end) {
-        const float * const srow = (const float *)cdata;
-
+        const void * srow = (const void *)cdata;
          int j = 0;
-
          for (int64_t i = 0; i < rs; ++i) {
              switch (op) {
                  case GGML_OP_POOL_AVG:   drow[i] = 0;        break;
@@ -14603,10 +14601,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
                  case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
              }
              for (int ki = 0; ki < k; ++ki) {
+                const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
                  switch (op) {
-                    case GGML_OP_POOL_AVG:                          drow[i] += srow[j]; break;
-                    case GGML_OP_POOL_MAX:   if (srow[j] > drow[i]) drow[i]  = srow[j]; break;
-                    case GGML_OP_POOL_COUNT:                        GGML_ASSERT(false); break;
+                    case GGML_OP_POOL_AVG:                         drow[i] += srow_j; break;
+                    case GGML_OP_POOL_MAX:   if (srow_j > drow[i]) drow[i]  = srow_j; break;
+                    case GGML_OP_POOL_COUNT:                       GGML_ASSERT(false); break;
                  }
                  ++j;
              }
@@ -14647,7 +14646,7 @@ static void ggml_compute_forward_pool_2d(
  
      const struct ggml_tensor * src = dst->src[0];
  
-    GGML_ASSERT(src->type == GGML_TYPE_F32);
+    assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
  
      if (params->ith != 0) {
          return;
@@ -14690,14 +14689,15 @@ static void ggml_compute_forward_pool_2d(
  
                  for (int ky = 0; ky < k1; ++ky) {
                      if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
-                    const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
+                    const void * srow = (const void *)(cdata + src->nb[1] * (iy + ky));
                      for (int kx = 0; kx < k0; ++kx) {
                          int j = ix + kx;
                          if (j < 0 || j >= src->ne[0]) continue;
+                        const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
                          switch (op) {
-                            case GGML_OP_POOL_AVG:                     *out += srow[j]; break;
-                            case GGML_OP_POOL_MAX: if (srow[j] > *out) *out  = srow[j]; break;
-                            case GGML_OP_POOL_COUNT:                GGML_ASSERT(false); break;
+                            case GGML_OP_POOL_AVG:                     *out += srow_j; break;
+                            case GGML_OP_POOL_MAX: if (srow_j > *out)  *out  = srow_j; break;
+                            case GGML_OP_POOL_COUNT:               GGML_ASSERT(false); break;
                          }
                      }
                  }
diff --git a/tests/test-pool.c b/tests/test-pool.c

index d1252927be07e9f45a9742e9c21603d20c394e69..f943dc61de03a26bf59c80bfb6a22b9bde49ff96 100644 (file)
--- a/tests/test-pool.c
+++ b/tests/test-pool.c
@@ -15,11 +15,13 @@ struct ggml_context* make_ctx(void) {
  int main(int argc, const char** argv) {
  
      float buf_f32[1024];
+    ggml_fp16_t buf_f16[1024];
      for (int i = 0; i < 1024; ++i) {
          buf_f32[i] = (float)(i + 1);
+        buf_f16[i] = ggml_fp32_to_fp16(buf_f32[i]);
      }
  
-    // avg pool 1d
+    // avg pool 1d - Float 32
      {
          struct ggml_context * ctx = make_ctx();
          struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
@@ -47,7 +49,35 @@ int main(int argc, const char** argv) {
          ggml_free(ctx);
      }
  
-    // max pool 1d
+    // avg pool 1d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_AVG, 3, 3, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+
+        GGML_ASSERT(output[0] == 2);
+        GGML_ASSERT(output[1] == 5);
+        GGML_ASSERT(output[2] == 8);
+        GGML_ASSERT(output[3] == 12);
+        GGML_ASSERT(output[4] == 15);
+        GGML_ASSERT(output[5] == 18);
+
+        ggml_free(ctx);
+    }
+
+    // max pool 1d - Float 32
      {
          struct ggml_context * ctx = make_ctx();
          struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
@@ -74,7 +104,34 @@ int main(int argc, const char** argv) {
          ggml_free(ctx);
      }
  
-    // avg pool 2d
+    // max pool 1d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_MAX, 3, 3, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 3);
+        GGML_ASSERT(output[1] == 6);
+        GGML_ASSERT(output[2] == 9);
+        GGML_ASSERT(output[3] == 13);
+        GGML_ASSERT(output[4] == 16);
+        GGML_ASSERT(output[5] == 19);
+
+        ggml_free(ctx);
+    }
+
+    // avg pool 2d - Float 32
      {
          struct ggml_context * ctx = make_ctx();
          struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
@@ -109,7 +166,42 @@ int main(int argc, const char** argv) {
          ggml_free(ctx);
      }
  
-    // max pool 2d
+    // avg pool 2d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_AVG, 3, 4, 3, 4, 0, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 2);
+        GGML_ASSERT(t_pooled->ne[3] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 17);
+        GGML_ASSERT(output[1] == 20);
+        GGML_ASSERT(output[2] == 23);
+        GGML_ASSERT(output[3] == 57);
+        GGML_ASSERT(output[4] == 60);
+        GGML_ASSERT(output[5] == 63);
+        GGML_ASSERT(output[6] == 117);
+        GGML_ASSERT(output[7] == 120);
+        GGML_ASSERT(output[8] == 123);
+        GGML_ASSERT(output[9] == 157);
+        GGML_ASSERT(output[10] == 160);
+        GGML_ASSERT(output[11] == 163);
+
+
+        ggml_free(ctx);
+    }
+
+    // max pool 2d - Float 32
      {
          struct ggml_context * ctx = make_ctx();
          struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
@@ -143,5 +235,39 @@ int main(int argc, const char** argv) {
          ggml_free(ctx);
      }
  
+    // max pool 2d - Float 16
+    {
+        struct ggml_context * ctx = make_ctx();
+        struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2);
+        memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+        struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_MAX, 3, 4, 3, 4, 0, 0);
+        GGML_ASSERT(t_pooled->ne[0] == 3);
+        GGML_ASSERT(t_pooled->ne[1] == 2);
+        GGML_ASSERT(t_pooled->ne[2] == 2);
+        GGML_ASSERT(t_pooled->ne[3] == 1);
+
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, t_pooled);
+
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+        const float * output = ggml_get_data_f32(t_pooled);
+        GGML_ASSERT(output[0] == 33);
+        GGML_ASSERT(output[1] == 36);
+        GGML_ASSERT(output[2] == 39);
+        GGML_ASSERT(output[3] == 73);
+        GGML_ASSERT(output[4] == 76);
+        GGML_ASSERT(output[5] == 79);
+        GGML_ASSERT(output[6] == 133);
+        GGML_ASSERT(output[7] == 136);
+        GGML_ASSERT(output[8] == 139);
+        GGML_ASSERT(output[9] == 173);
+        GGML_ASSERT(output[10] == 176);
+        GGML_ASSERT(output[11] == 179);
+
+        ggml_free(ctx);
+    }
+
      return 0;
  }
author	Ivan Filipov <redacted>
	Mon, 22 Jul 2024 11:32:02 +0000 (14:32 +0300)
committer	GitHub <redacted>
	Mon, 22 Jul 2024 11:32:02 +0000 (14:32 +0300)
src/ggml.c		patch \| blob \| history
tests/test-pool.c		patch \| blob \| history