From: Ivan Filipov Date: Mon, 22 Jul 2024 11:32:02 +0000 (+0300) Subject: ggml: add support for float16 input tensors in pooling operations (#895) X-Git-Tag: upstream/0.0.1642~530 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2560b8ddc6c106c1334b5d6350632718e1c87590;p=pkg%2Fggml%2Fsources%2Fggml ggml: add support for float16 input tensors in pooling operations (#895) * Add support for float16 tensors in 1d pooling operations * Add support for float16 input tensors in 2d pooling operations * code cleanup remove unnecessary casting during srow ptr initialization --------- Co-authored-by: vanaka11 --- diff --git a/src/ggml.c b/src/ggml.c index ae7bee5e..46beec5e 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -14579,7 +14579,7 @@ static void ggml_compute_forward_pool_1d_sk_p0( const struct ggml_tensor * src = dst->src[0]; - assert(src->type == GGML_TYPE_F32); + assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16); if (params->ith != 0) { return; @@ -14592,10 +14592,8 @@ static void ggml_compute_forward_pool_1d_sk_p0( const int64_t rs = dst->ne[0]; while (cdata < data_end) { - const float * const srow = (const float *)cdata; - + const void * srow = (const void *)cdata; int j = 0; - for (int64_t i = 0; i < rs; ++i) { switch (op) { case GGML_OP_POOL_AVG: drow[i] = 0; break; @@ -14603,10 +14601,11 @@ static void ggml_compute_forward_pool_1d_sk_p0( case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; } for (int ki = 0; ki < k; ++ki) { + const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]); switch (op) { - case GGML_OP_POOL_AVG: drow[i] += srow[j]; break; - case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_AVG: drow[i] += srow_j; break; + case GGML_OP_POOL_MAX: if (srow_j > drow[i]) drow[i] = srow_j; break; + case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; } ++j; } @@ -14647,7 +14646,7 @@ static void ggml_compute_forward_pool_2d( const struct ggml_tensor * src = dst->src[0]; - GGML_ASSERT(src->type == GGML_TYPE_F32); + assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16); if (params->ith != 0) { return; @@ -14690,14 +14689,15 @@ static void ggml_compute_forward_pool_2d( for (int ky = 0; ky < k1; ++ky) { if (iy + ky < 0 || iy + ky >= src->ne[1]) continue; - const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky)); + const void * srow = (const void *)(cdata + src->nb[1] * (iy + ky)); for (int kx = 0; kx < k0; ++kx) { int j = ix + kx; if (j < 0 || j >= src->ne[0]) continue; + const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]); switch (op) { - case GGML_OP_POOL_AVG: *out += srow[j]; break; - case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break; - case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; + case GGML_OP_POOL_AVG: *out += srow_j; break; + case GGML_OP_POOL_MAX: if (srow_j > *out) *out = srow_j; break; + case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break; } } } diff --git a/tests/test-pool.c b/tests/test-pool.c index d1252927..f943dc61 100644 --- a/tests/test-pool.c +++ b/tests/test-pool.c @@ -15,11 +15,13 @@ struct ggml_context* make_ctx(void) { int main(int argc, const char** argv) { float buf_f32[1024]; + ggml_fp16_t buf_f16[1024]; for (int i = 0; i < 1024; ++i) { buf_f32[i] = (float)(i + 1); + buf_f16[i] = ggml_fp32_to_fp16(buf_f32[i]); } - // avg pool 1d + // avg pool 1d - Float 32 { struct ggml_context * ctx = make_ctx(); struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); @@ -47,7 +49,35 @@ int main(int argc, const char** argv) { ggml_free(ctx); } - // max pool 1d + // avg pool 1d - Float 16 + { + struct ggml_context * ctx = make_ctx(); + struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2); + memcpy(t->data, buf_f16, ggml_nbytes(t)); + + struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_AVG, 3, 3, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 1); + + struct ggml_cgraph * graph = ggml_new_graph(ctx); + ggml_build_forward_expand(graph, t_pooled); + + ggml_graph_compute_with_ctx(ctx, graph, 4); + + const float * output = ggml_get_data_f32(t_pooled); + + GGML_ASSERT(output[0] == 2); + GGML_ASSERT(output[1] == 5); + GGML_ASSERT(output[2] == 8); + GGML_ASSERT(output[3] == 12); + GGML_ASSERT(output[4] == 15); + GGML_ASSERT(output[5] == 18); + + ggml_free(ctx); + } + + // max pool 1d - Float 32 { struct ggml_context * ctx = make_ctx(); struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2); @@ -74,7 +104,34 @@ int main(int argc, const char** argv) { ggml_free(ctx); } - // avg pool 2d + // max pool 1d - Float 16 + { + struct ggml_context * ctx = make_ctx(); + struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2); + memcpy(t->data, buf_f16, ggml_nbytes(t)); + + struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_MAX, 3, 3, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 1); + + struct ggml_cgraph * graph = ggml_new_graph(ctx); + ggml_build_forward_expand(graph, t_pooled); + + ggml_graph_compute_with_ctx(ctx, graph, 4); + + const float * output = ggml_get_data_f32(t_pooled); + GGML_ASSERT(output[0] == 3); + GGML_ASSERT(output[1] == 6); + GGML_ASSERT(output[2] == 9); + GGML_ASSERT(output[3] == 13); + GGML_ASSERT(output[4] == 16); + GGML_ASSERT(output[5] == 19); + + ggml_free(ctx); + } + + // avg pool 2d - Float 32 { struct ggml_context * ctx = make_ctx(); struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2); @@ -109,7 +166,42 @@ int main(int argc, const char** argv) { ggml_free(ctx); } - // max pool 2d + // avg pool 2d - Float 16 + { + struct ggml_context * ctx = make_ctx(); + struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2); + memcpy(t->data, buf_f16, ggml_nbytes(t)); + + struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_AVG, 3, 4, 3, 4, 0, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 2); + GGML_ASSERT(t_pooled->ne[3] == 1); + + struct ggml_cgraph * graph = ggml_new_graph(ctx); + ggml_build_forward_expand(graph, t_pooled); + + ggml_graph_compute_with_ctx(ctx, graph, 4); + + const float * output = ggml_get_data_f32(t_pooled); + GGML_ASSERT(output[0] == 17); + GGML_ASSERT(output[1] == 20); + GGML_ASSERT(output[2] == 23); + GGML_ASSERT(output[3] == 57); + GGML_ASSERT(output[4] == 60); + GGML_ASSERT(output[5] == 63); + GGML_ASSERT(output[6] == 117); + GGML_ASSERT(output[7] == 120); + GGML_ASSERT(output[8] == 123); + GGML_ASSERT(output[9] == 157); + GGML_ASSERT(output[10] == 160); + GGML_ASSERT(output[11] == 163); + + + ggml_free(ctx); + } + + // max pool 2d - Float 32 { struct ggml_context * ctx = make_ctx(); struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2); @@ -143,5 +235,39 @@ int main(int argc, const char** argv) { ggml_free(ctx); } + // max pool 2d - Float 16 + { + struct ggml_context * ctx = make_ctx(); + struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2); + memcpy(t->data, buf_f16, ggml_nbytes(t)); + + struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_MAX, 3, 4, 3, 4, 0, 0); + GGML_ASSERT(t_pooled->ne[0] == 3); + GGML_ASSERT(t_pooled->ne[1] == 2); + GGML_ASSERT(t_pooled->ne[2] == 2); + GGML_ASSERT(t_pooled->ne[3] == 1); + + struct ggml_cgraph * graph = ggml_new_graph(ctx); + ggml_build_forward_expand(graph, t_pooled); + + ggml_graph_compute_with_ctx(ctx, graph, 4); + + const float * output = ggml_get_data_f32(t_pooled); + GGML_ASSERT(output[0] == 33); + GGML_ASSERT(output[1] == 36); + GGML_ASSERT(output[2] == 39); + GGML_ASSERT(output[3] == 73); + GGML_ASSERT(output[4] == 76); + GGML_ASSERT(output[5] == 79); + GGML_ASSERT(output[6] == 133); + GGML_ASSERT(output[7] == 136); + GGML_ASSERT(output[8] == 139); + GGML_ASSERT(output[9] == 173); + GGML_ASSERT(output[10] == 176); + GGML_ASSERT(output[11] == 179); + + ggml_free(ctx); + } + return 0; }