const struct ggml_tensor * src = dst->src[0];
- assert(src->type == GGML_TYPE_F32);
+ assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
if (params->ith != 0) {
return;
const int64_t rs = dst->ne[0];
while (cdata < data_end) {
- const float * const srow = (const float *)cdata;
-
+ const void * srow = (const void *)cdata;
int j = 0;
-
for (int64_t i = 0; i < rs; ++i) {
switch (op) {
case GGML_OP_POOL_AVG: drow[i] = 0; break;
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
}
for (int ki = 0; ki < k; ++ki) {
+ const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
switch (op) {
- case GGML_OP_POOL_AVG: drow[i] += srow[j]; break;
- case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_AVG: drow[i] += srow_j; break;
+ case GGML_OP_POOL_MAX: if (srow_j > drow[i]) drow[i] = srow_j; break;
+ case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
}
++j;
}
const struct ggml_tensor * src = dst->src[0];
- GGML_ASSERT(src->type == GGML_TYPE_F32);
+ assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
if (params->ith != 0) {
return;
for (int ky = 0; ky < k1; ++ky) {
if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
- const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
+ const void * srow = (const void *)(cdata + src->nb[1] * (iy + ky));
for (int kx = 0; kx < k0; ++kx) {
int j = ix + kx;
if (j < 0 || j >= src->ne[0]) continue;
+ const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
switch (op) {
- case GGML_OP_POOL_AVG: *out += srow[j]; break;
- case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_AVG: *out += srow_j; break;
+ case GGML_OP_POOL_MAX: if (srow_j > *out) *out = srow_j; break;
+ case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
}
}
}
int main(int argc, const char** argv) {
float buf_f32[1024];
+ ggml_fp16_t buf_f16[1024];
for (int i = 0; i < 1024; ++i) {
buf_f32[i] = (float)(i + 1);
+ buf_f16[i] = ggml_fp32_to_fp16(buf_f32[i]);
}
- // avg pool 1d
+ // avg pool 1d - Float 32
{
struct ggml_context * ctx = make_ctx();
struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
ggml_free(ctx);
}
- // max pool 1d
+ // avg pool 1d - Float 16
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2);
+ memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+ struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_AVG, 3, 3, 0);
+ GGML_ASSERT(t_pooled->ne[0] == 3);
+ GGML_ASSERT(t_pooled->ne[1] == 2);
+ GGML_ASSERT(t_pooled->ne[2] == 1);
+
+ struct ggml_cgraph * graph = ggml_new_graph(ctx);
+ ggml_build_forward_expand(graph, t_pooled);
+
+ ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+ const float * output = ggml_get_data_f32(t_pooled);
+
+ GGML_ASSERT(output[0] == 2);
+ GGML_ASSERT(output[1] == 5);
+ GGML_ASSERT(output[2] == 8);
+ GGML_ASSERT(output[3] == 12);
+ GGML_ASSERT(output[4] == 15);
+ GGML_ASSERT(output[5] == 18);
+
+ ggml_free(ctx);
+ }
+
+ // max pool 1d - Float 32
{
struct ggml_context * ctx = make_ctx();
struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 10, 2);
ggml_free(ctx);
}
- // avg pool 2d
+ // max pool 1d - Float 16
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t = ggml_new_tensor_2d(ctx, GGML_TYPE_F16, 10, 2);
+ memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+ struct ggml_tensor * t_pooled = ggml_pool_1d(ctx, t, GGML_OP_POOL_MAX, 3, 3, 0);
+ GGML_ASSERT(t_pooled->ne[0] == 3);
+ GGML_ASSERT(t_pooled->ne[1] == 2);
+ GGML_ASSERT(t_pooled->ne[2] == 1);
+
+ struct ggml_cgraph * graph = ggml_new_graph(ctx);
+ ggml_build_forward_expand(graph, t_pooled);
+
+ ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+ const float * output = ggml_get_data_f32(t_pooled);
+ GGML_ASSERT(output[0] == 3);
+ GGML_ASSERT(output[1] == 6);
+ GGML_ASSERT(output[2] == 9);
+ GGML_ASSERT(output[3] == 13);
+ GGML_ASSERT(output[4] == 16);
+ GGML_ASSERT(output[5] == 19);
+
+ ggml_free(ctx);
+ }
+
+ // avg pool 2d - Float 32
{
struct ggml_context * ctx = make_ctx();
struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
ggml_free(ctx);
}
- // max pool 2d
+ // avg pool 2d - Float 16
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2);
+ memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+ struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_AVG, 3, 4, 3, 4, 0, 0);
+ GGML_ASSERT(t_pooled->ne[0] == 3);
+ GGML_ASSERT(t_pooled->ne[1] == 2);
+ GGML_ASSERT(t_pooled->ne[2] == 2);
+ GGML_ASSERT(t_pooled->ne[3] == 1);
+
+ struct ggml_cgraph * graph = ggml_new_graph(ctx);
+ ggml_build_forward_expand(graph, t_pooled);
+
+ ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+ const float * output = ggml_get_data_f32(t_pooled);
+ GGML_ASSERT(output[0] == 17);
+ GGML_ASSERT(output[1] == 20);
+ GGML_ASSERT(output[2] == 23);
+ GGML_ASSERT(output[3] == 57);
+ GGML_ASSERT(output[4] == 60);
+ GGML_ASSERT(output[5] == 63);
+ GGML_ASSERT(output[6] == 117);
+ GGML_ASSERT(output[7] == 120);
+ GGML_ASSERT(output[8] == 123);
+ GGML_ASSERT(output[9] == 157);
+ GGML_ASSERT(output[10] == 160);
+ GGML_ASSERT(output[11] == 163);
+
+
+ ggml_free(ctx);
+ }
+
+ // max pool 2d - Float 32
{
struct ggml_context * ctx = make_ctx();
struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 10, 10, 2);
ggml_free(ctx);
}
+ // max pool 2d - Float 16
+ {
+ struct ggml_context * ctx = make_ctx();
+ struct ggml_tensor * t = ggml_new_tensor_3d(ctx, GGML_TYPE_F16, 10, 10, 2);
+ memcpy(t->data, buf_f16, ggml_nbytes(t));
+
+ struct ggml_tensor * t_pooled = ggml_pool_2d(ctx, t, GGML_OP_POOL_MAX, 3, 4, 3, 4, 0, 0);
+ GGML_ASSERT(t_pooled->ne[0] == 3);
+ GGML_ASSERT(t_pooled->ne[1] == 2);
+ GGML_ASSERT(t_pooled->ne[2] == 2);
+ GGML_ASSERT(t_pooled->ne[3] == 1);
+
+ struct ggml_cgraph * graph = ggml_new_graph(ctx);
+ ggml_build_forward_expand(graph, t_pooled);
+
+ ggml_graph_compute_with_ctx(ctx, graph, 4);
+
+ const float * output = ggml_get_data_f32(t_pooled);
+ GGML_ASSERT(output[0] == 33);
+ GGML_ASSERT(output[1] == 36);
+ GGML_ASSERT(output[2] == 39);
+ GGML_ASSERT(output[3] == 73);
+ GGML_ASSERT(output[4] == 76);
+ GGML_ASSERT(output[5] == 79);
+ GGML_ASSERT(output[6] == 133);
+ GGML_ASSERT(output[7] == 136);
+ GGML_ASSERT(output[8] == 139);
+ GGML_ASSERT(output[9] == 173);
+ GGML_ASSERT(output[10] == 176);
+ GGML_ASSERT(output[11] == 179);
+
+ ggml_free(ctx);
+ }
+
return 0;
}