From: Acly Date: Tue, 1 Jul 2025 07:11:00 +0000 (+0200) Subject: ggml-cpu : "align corners" for bilinear upscale/downscale (#1285) X-Git-Tag: upstream/0.0.2309~89 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=67ad436cb653ac1ef0986f9fb0c6191ec828d1ed;p=pkg%2Fggml%2Fsources%2Fggml ggml-cpu : "align corners" for bilinear upscale/downscale (#1285) * add "align corners" mode for bilinear upscale, and allow downscaling * add ggml_interpolate, deprecate ggml_upscale_ext, pass in align-corners as bit-flag * test-backend-ops: replace ggml_upscale_ext with ggml_interpolate, add test cases for downscale and align-corners --- diff --git a/include/ggml.h b/include/ggml.h index 9c4e2402..4231a668 100644 --- a/include/ggml.h +++ b/include/ggml.h @@ -1765,6 +1765,12 @@ extern "C" { enum ggml_scale_mode { GGML_SCALE_MODE_NEAREST = 0, GGML_SCALE_MODE_BILINEAR = 1, + + GGML_SCALE_MODE_COUNT + }; + + enum ggml_scale_flag { + GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8) }; // interpolate @@ -1777,14 +1783,26 @@ extern "C" { // interpolate // interpolate scale to specified dimensions - GGML_API struct ggml_tensor * ggml_upscale_ext( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext( struct ggml_context * ctx, struct ggml_tensor * a, int ne0, int ne1, int ne2, int ne3, - enum ggml_scale_mode mode); + enum ggml_scale_mode mode), + "use ggml_interpolate instead"); + + // Up- or downsamples the input to the specified size. + // 2D scale modes (eg. bilinear) are applied to the first two dimensions. + GGML_API struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...] // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0] GGML_API struct ggml_tensor * ggml_pad( diff --git a/src/ggml-cpu/ops.cpp b/src/ggml-cpu/ops.cpp index eff4a53e..f6ebb4ea 100644 --- a/src/ggml-cpu/ops.cpp +++ b/src/ggml-cpu/ops.cpp @@ -6608,12 +6608,13 @@ static void ggml_compute_forward_upscale_f32( GGML_TENSOR_UNARY_OP_LOCALS - const float sf0 = (float)ne0/src0->ne[0]; - const float sf1 = (float)ne1/src0->ne[1]; - const float sf2 = (float)ne2/src0->ne[2]; - const float sf3 = (float)ne3/src0->ne[3]; + float sf0 = (float)ne0/src0->ne[0]; + float sf1 = (float)ne1/src0->ne[1]; + float sf2 = (float)ne2/src0->ne[2]; + float sf3 = (float)ne3/src0->ne[3]; - const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0); + const int32_t mode_flags = ggml_get_op_params_i32(dst, 0); + const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF); if (mode == GGML_SCALE_MODE_NEAREST) { for (int64_t i3 = 0; i3 < ne3; i3++) { @@ -6634,8 +6635,12 @@ static void ggml_compute_forward_upscale_f32( } } } else if (mode == GGML_SCALE_MODE_BILINEAR) { - // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True - const float pixel_offset = 0.5f; + float pixel_offset = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { + pixel_offset = 0.0f; + sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1); + sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1); + } for (int64_t i3 = 0; i3 < ne3; i3++) { const int64_t i03 = i3 / sf3; diff --git a/src/ggml.c b/src/ggml.c index f8e7c595..354a8051 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -4247,24 +4247,21 @@ struct ggml_tensor * ggml_pool_2d_back( return result; } -// ggml_upscale +// ggml_upscale / ggml_interpolate -static struct ggml_tensor * ggml_upscale_impl( +static struct ggml_tensor * ggml_interpolate_impl( struct ggml_context * ctx, struct ggml_tensor * a, - int ne0, - int ne1, - int ne2, - int ne3, - enum ggml_scale_mode mode) { - GGML_ASSERT(a->ne[0] <= ne0); - GGML_ASSERT(a->ne[1] <= ne1); - GGML_ASSERT(a->ne[2] <= ne2); - GGML_ASSERT(a->ne[3] <= ne3); - + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT); + struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); - ggml_set_op_params_i32(result, 0, mode); + ggml_set_op_params_i32(result, 0, (int32_t)mode); result->op = GGML_OP_UPSCALE; result->src[0] = a; @@ -4277,7 +4274,8 @@ struct ggml_tensor * ggml_upscale( struct ggml_tensor * a, int scale_factor, enum ggml_scale_mode mode) { - return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); + GGML_ASSERT(scale_factor > 1); + return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); } struct ggml_tensor * ggml_upscale_ext( @@ -4288,7 +4286,18 @@ struct ggml_tensor * ggml_upscale_ext( int ne2, int ne3, enum ggml_scale_mode mode) { - return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode); + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); +} + +struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); } // ggml_pad diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e0d6eed5..701807fc 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -326,4 +326,13 @@ if (NOT GGML_BACKEND_DL) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") + + # + # test-interpolate + + set(TEST_TARGET test-interpolate) + add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) + target_link_libraries(${TEST_TARGET} PRIVATE ggml) + add_test(NAME ${TEST_TARGET} COMMAND $) + set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") endif() diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 772bee34..b64a7815 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3066,28 +3066,28 @@ struct test_upscale : public test_case { } }; -// GGML_OP_UPSCALE (ext) -struct test_upscale_ext : public test_case { +// GGML_OP_UPSCALE (via ggml_interpolate) +struct test_interpolate : public test_case { const ggml_type type; const std::array ne; const std::array ne_tgt; - const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST; + const uint32_t mode = GGML_SCALE_MODE_NEAREST; std::string vars() override { return VARS_TO_STR4(type, ne, ne_tgt, mode); } - test_upscale_ext(ggml_type type = GGML_TYPE_F32, + test_interpolate(ggml_type type = GGML_TYPE_F32, std::array ne = {2, 5, 7, 11}, std::array ne_tgt = {5, 7, 11, 13}, - ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST) + uint32_t mode = GGML_SCALE_MODE_NEAREST) : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {} ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); ggml_set_name(a, "a"); - ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode); + ggml_tensor * out = ggml_interpolate(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode); ggml_set_name(out, "out"); return out; @@ -4521,8 +4521,10 @@ static std::vector> make_test_cases_eval() { for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) { test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode)); test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true)); - test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode)); + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode)); + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode)); } + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS)); test_cases.emplace_back(new test_sum()); test_cases.emplace_back(new test_sum_rows()); diff --git a/tests/test-interpolate.cpp b/tests/test-interpolate.cpp new file mode 100644 index 00000000..43d1d0e8 --- /dev/null +++ b/tests/test-interpolate.cpp @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +bool check_equal(const float * result, const float * expected, int64_t n) { + for (int i = 0; i < n; i++) { + if(std::abs(result[i] - expected[i]) > 1e-4) { + printf("result[%d] %f != %f expected[%d]\n", i, result[i], expected[i], i); + return false; + } + } + return true; +} + +bool test_interpolate(char const* name, + std::array src_ne, const float * src_data, + std::array dst_ne, const float * expected, + uint32_t mode) { + ggml_time_init(); + + ggml_init_params params { + /*.mem_size =*/ 64 * ggml_tensor_overhead() + ggml_graph_overhead(), + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ true + }; + + ggml_context_ptr ctx_ptr{ggml_init(params)}; + ggml_context * ctx = ctx_ptr.get(); + ggml_cgraph * gf = ggml_new_graph(ctx); + + // Build graph + ggml_tensor * src = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, src_ne.data()); + ggml_tensor * res = ggml_interpolate(ctx, src, dst_ne[0], dst_ne[1], dst_ne[2], dst_ne[3], mode); + ggml_build_forward_expand(gf, res); + + // Create backend & allocate buffers + ggml_backend_ptr backend_ptr{ggml_backend_cpu_init()}; + ggml_backend_t backend = backend_ptr.get(); + ggml_backend_cpu_set_n_threads(backend, 2); + ggml_backend_buffer_ptr buffer{ggml_backend_alloc_ctx_tensors(ctx, backend)}; + + // Execute and compare results + ggml_backend_tensor_set(src, src_data, 0, ggml_nbytes(src)); + ggml_backend_graph_compute(backend, gf); + + std::vector res_values(ggml_nelements(res)); + ggml_backend_tensor_get(res, res_values.data(), 0, ggml_nbytes(res)); + + bool passed = check_equal(res_values.data(), expected, ggml_nelements(res)); + + printf("ggml_interpolate(%s): %s\n", name, passed ? "\033[32mPASSED\033[0m" : "\033[31mFAILED\033[0m"); + return passed; +} + +const float input_upscale[] = { + 0.0f, 1.0f, + 2.0f, 4.0f +}; + +const float expected_upscale_x2_nearest[] = { + 0.0f, 0.0f, 1.0f, 1.0f, + 0.0f, 0.0f, 1.0f, 1.0f, + 2.0f, 2.0f, 4.0f, 4.0f, + 2.0f, 2.0f, 4.0f, 4.0f +}; + +const float expected_upscale_x2_bilinear[] = { + 0.0f, 0.2500f, 0.7500f, 1.00f, + 0.5f, 0.8125f, 1.4375f, 1.75f, + 1.5f, 1.9375f, 2.8125f, 3.25f, + 2.0f, 2.5000f, 3.5000f, 4.00f +}; + +const float expected_upscale_x2_bilinear_align_corners[] = { + 0.0000f, 0.3333f, 0.6667f, 1.0000f, + 0.6667f, 1.1111f, 1.5556f, 2.0000f, + 1.3333f, 1.8889f, 2.4444f, 3.0000f, + 2.0000f, 2.6667f, 3.3333f, 4.0000f +}; + +const float expected_upscale_x1_5_bilinear_align_corners[] = { + 0.0f, 1.0f, + 1.0f, 2.5f, + 2.0f, 4.0f +}; + +const float input_downscale[] = { + 0.0f, -1.0f, -2.0f, 0.0f, + 1.0f, 2.0f , 4.0f , 4.0f, + 2.0f, 2.0f , 1.0f , 1.0f, + + 1.0f, 2.0f , 3.0f , 4.0f, + 2.0f, 2.0f , 2.0f , 2.0f, + -2.0f, 2.0f, -4.0f, 4.0f +}; + +const float expected_downscale_nearest[] = { + 0.0f, -2.0f, + + 1.0f, 3.0f +}; + +const float expected_downscale_bilinear[] = { + 0.1667f, -0.3750f, 0.7500f, + 1.7917f, 1.8750f, 1.7500f, + + 1.3750f, 2.3750f, 3.3750f, + -0.5000f, -0.2500f, 2.5000f +}; + +const float expected_downscale_bilinear_align_corners[] = { + 0.0f , -1.5f, 0.0f, + 2.0f , 1.5f, 1.0f, + + 1.0f , 2.5f, 4.0f, + -2.0f, -1.0f, 4.0f +}; + +int main() { + bool passed = true; + + passed &= test_interpolate("upscale_x2_nearest", + {2, 2, 1, 1}, input_upscale, + {4, 4, 1, 1}, expected_upscale_x2_nearest, + GGML_SCALE_MODE_NEAREST); + + passed &= test_interpolate("upscale_x2_bilinear", + {2, 2, 1, 1}, input_upscale, + {4, 4, 1, 1}, expected_upscale_x2_bilinear, + GGML_SCALE_MODE_BILINEAR); + + passed &= test_interpolate("upscale_x2_bilinear_align_corners", + {2, 2, 1, 1}, input_upscale, + {4, 4, 1, 1}, expected_upscale_x2_bilinear_align_corners, + GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS); + + passed &= test_interpolate("upscale_x1_5_bilinear_align_corners", + {2, 2, 1, 1}, input_upscale, + {2, 3, 1, 1}, expected_upscale_x1_5_bilinear_align_corners, + GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS); + + passed &= test_interpolate("downscale_nearest", + {4, 3, 2, 1}, input_downscale, + {2, 1, 2, 1}, expected_downscale_nearest, + GGML_SCALE_MODE_NEAREST); + + passed &= test_interpolate("downscale_bilinear", + {4, 3, 2, 1}, input_downscale, + {3, 2, 2, 1}, expected_downscale_bilinear, + GGML_SCALE_MODE_BILINEAR); + + passed &= test_interpolate("downscale_bilinear_align_corners", + {4, 3, 2, 1}, input_downscale, + {3, 2, 2, 1}, expected_downscale_bilinear_align_corners, + GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS); + + return passed ? 0 : 1; +} \ No newline at end of file