ggml : add bilinear upscale support (ggml/1185)

author Diego Devesa <redacted>

Wed, 9 Apr 2025 10:32:13 +0000 (12:32 +0200)

committer Georgi Gerganov <redacted>

Thu, 10 Apr 2025 21:17:47 +0000 (00:17 +0300)
author Diego Devesa <redacted>
Wed, 9 Apr 2025 10:32:13 +0000 (12:32 +0200)
committer Georgi Gerganov <redacted>
Thu, 10 Apr 2025 21:17:47 +0000 (00:17 +0300)
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h

index a5447ecdf686a93e51245a0dda6db96a4bd52588..8fcc16df998bee004bb4d7b3c8739e5032c387ec 100644 (file)
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1717,24 +1717,29 @@ extern "C" {
              float                 p0,
              float                 p1);
  
-    // nearest interpolate
+    enum ggml_scale_mode {
+        GGML_SCALE_MODE_NEAREST  = 0,
+        GGML_SCALE_MODE_BILINEAR = 1,
+    };
+
+    // interpolate
      // multiplies ne0 and ne1 by scale factor
-    // used in stable-diffusion
      GGML_API struct ggml_tensor * ggml_upscale(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
-            int                   scale_factor);
+            int                   scale_factor,
+            enum ggml_scale_mode  mode);
  
-    // nearest interpolate
-    // nearest interpolate to specified dimensions
-    // used in tortoise.cpp
+    // interpolate
+    // interpolate scale to specified dimensions
      GGML_API struct ggml_tensor * ggml_upscale_ext(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
              int                   ne0,
              int                   ne1,
              int                   ne2,
-            int                   ne3);
+            int                   ne3,
+            enum ggml_scale_mode  mode);
  
      // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
      GGML_API struct ggml_tensor * ggml_pad(
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp

index b513270c6e5ac710187728639f4bd4b4b8bd8a21..cec36b36e7e9238ce4b8964ab80ec3322b587c7f 100644 (file)
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
@@ -1824,6 +1824,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
              if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
                  return false;
              }
+            if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
+                return false;
+            }
              return true;
          }
          case GGML_OP_POOL_2D: {
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp

index 36b98152e0887dc365122263cb3d90c67129477f..6050147be70accd7fd25df343012ca4e54b48702 100644 (file)
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -6351,24 +6351,72 @@ static void ggml_compute_forward_upscale_f32(
      const float sf2 = (float)ne2/src0->ne[2];
      const float sf3 = (float)ne3/src0->ne[3];
  
-    // TODO: optimize
-
-    for (int64_t i3 = 0; i3 < ne3; i3++) {
-        const int64_t i03 = i3 / sf3;
-        for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
-            const int64_t i02 = i2 / sf2;
-            for (int64_t i1 = 0; i1 < ne1; i1++) {
-                const int64_t i01 = i1 / sf1;
-                for (int64_t i0 = 0; i0 < ne0; i0++) {
-                    const int64_t i00 = i0 / sf0;
-
-                    const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
-                          float * y = (float *)((char *)  dst->data +  i0*nb0  +  i1*nb1  +  i2*nb2  +  i3*nb3);
-
-                    *y = *x;
+    const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
+
+    if (mode == GGML_SCALE_MODE_NEAREST) {
+        for (int64_t i3 = 0; i3 < ne3; i3++) {
+            const int64_t i03 = i3 / sf3;
+            for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
+                const int64_t i02 = i2 / sf2;
+                for (int64_t i1 = 0; i1 < ne1; i1++) {
+                    const int64_t i01 = i1 / sf1;
+                    for (int64_t i0 = 0; i0 < ne0; i0++) {
+                        const int64_t i00 = i0 / sf0;
+
+                        const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
+                              float * y = (float *)((char *)  dst->data +  i0*nb0  +  i1*nb1  +  i2*nb2  +  i3*nb3);
+
+                        *y = *x;
+                    }
+                }
+            }
+        }
+    } else if (mode == GGML_SCALE_MODE_BILINEAR) {
+        // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
+        const float pixel_offset = 0.5f;
+
+        for (int64_t i3 = 0; i3 < ne3; i3++) {
+            const int64_t i03 = i3 / sf3;
+            for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
+                const int64_t i02 = i2 / sf2;
+                for (int64_t i1 = 0; i1 < ne1; i1++) {
+                    const float y = ((float)i1 + pixel_offset) / sf1 - pixel_offset;
+                    int64_t y0 = (int64_t)floorf(y);
+                    int64_t y1 = y0 + 1;
+
+                    y0 = std::max(int64_t(0), std::min(y0, ne01 - 1));
+                    y1 = std::max(int64_t(0), std::min(y1, ne01 - 1));
+
+                    float dy = y - (float)y0;
+                    dy = std::max(0.0f, std::min(dy, 1.0f));
+
+                    for (int64_t i0 = 0; i0 < ne0; i0++) {
+                        const float x = ((float)i0 + pixel_offset) / sf0 - pixel_offset;
+                        int64_t x0 = (int64_t)floorf(x);
+                        int64_t x1 = x0 + 1;
+
+                        x0 = std::max(int64_t(0), std::min(x0, ne00 - 1));
+                        x1 = std::max(int64_t(0), std::min(x1, ne00 - 1));
+
+                        float dx = x - (float)x0;
+                        dx = std::max(0.0f, std::min(dx, 1.0f));
+
+                        // fetch the four surrounding pixel values and interpolate
+                        const float a = *(const float *)((const char *)src0->data + x0*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
+                        const float b = *(const float *)((const char *)src0->data + x1*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
+                        const float c = *(const float *)((const char *)src0->data + x0*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
+                        const float d = *(const float *)((const char *)src0->data + x1*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
+
+                        const float val = a*(1 - dx)*(1 - dy) + b*dx*(1 - dy) + c*(1 - dx)*dy + d*dx*dy;
+
+                        float * y_dst = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
+                        *y_dst = val;
+                    }
                  }
              }
          }
+    } else {
+        GGML_ABORT("unsupported upscale mode");
      }
  }
  
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu

index 633456a92d0defe5b4d25b2f02f31419b215fc8c..fafe9633e202796d08ef988447143af712f6722c 100644 (file)
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3216,6 +3216,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
          case GGML_OP_GROUP_NORM:
              return ggml_is_contiguous(op->src[0]);
          case GGML_OP_UPSCALE:
+            return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
          case GGML_OP_PAD:
          case GGML_OP_ARANGE:
          case GGML_OP_TIMESTEP_EMBEDDING:
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m

index f226826020a5ad2bbe0093310eccaef59f7981fe..9f1c6c6ccc09ff3f3721c90b02ec37e389ca6a0e 100644 (file)
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -1334,8 +1334,9 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
              return op->src[0]->type == GGML_TYPE_F16;
          case GGML_OP_POOL_1D:
              return false;
-        case GGML_OP_POOL_2D:
          case GGML_OP_UPSCALE:
+            return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
+        case GGML_OP_POOL_2D:
          case GGML_OP_PAD:
          case GGML_OP_PAD_REFLECT_1D:
          case GGML_OP_TIMESTEP_EMBEDDING:
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp

index 89715eaea0753d7eb0150cd690bdeea1cb46c58b..e6f1603d84e073afce0989d8ffe159299c02b2e6 100644 (file)
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -4055,12 +4055,13 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
          case GGML_OP_IM2COL:
              // TODO: add support for the new F32 operations
              return op->src[0]->type == GGML_TYPE_F16;
+        case GGML_OP_UPSCALE:
+            return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
          case GGML_OP_POOL_2D:
          case GGML_OP_SUM:
          case GGML_OP_SUM_ROWS:
          case GGML_OP_ARGSORT:
          case GGML_OP_ACC:
-        case GGML_OP_UPSCALE:
          case GGML_OP_PAD:
          case GGML_OP_LEAKY_RELU:
          case GGML_OP_TIMESTEP_EMBEDDING:
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index e69d00ad54978f4c9b7824d4391db7b648ace534..783a0ff86c1c1ad4f5f2ecea53c32e7c547f4f7a 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -5749,7 +5749,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
          }
          return nullptr;
      case GGML_OP_UPSCALE:
-        if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
+        if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 && dst->op_params[0] == GGML_SCALE_MODE_NEAREST) {
              return ctx->device->pipeline_upscale_f32;
          }
          return nullptr;
@@ -9404,9 +9404,10 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
          case GGML_OP_COS:
          case GGML_OP_CLAMP:
              return op->src[0]->type == GGML_TYPE_F32;
+        case GGML_OP_UPSCALE:
+            return op->op_params[0] == GGML_SCALE_MODE_NEAREST;
          case GGML_OP_ACC:
          case GGML_OP_CONCAT:
-        case GGML_OP_UPSCALE:
          case GGML_OP_SCALE:
          case GGML_OP_PAD:
          case GGML_OP_DIAG_MASK_INF:
@@ -9774,7 +9775,7 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
      } else if (tensor->op == GGML_OP_CONCAT) {
          tensor_clone = ggml_concat(ggml_ctx, src_clone[0], src_clone[1], *(int *)tensor->op_params);
      } else if (tensor->op == GGML_OP_UPSCALE) {
-        tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
+        tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->op_params[0], tensor->op_params[1], (ggml_scale_mode) tensor->op_params[0]);
      } else if (tensor->op == GGML_OP_SCALE) {
          const float * params = (const float *)tensor->op_params;
          tensor_clone = ggml_scale(ggml_ctx, src_clone[0], params[0]);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index 98a0f61642be5aa8c8db68873080383635021c57..950772c75cb32867c5621b33fde4ca95eacc2fa6 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -4174,7 +4174,8 @@ static struct ggml_tensor * ggml_upscale_impl(
          int                   ne0,
          int                   ne1,
          int                   ne2,
-        int                   ne3) {
+        int                   ne3,
+        enum ggml_scale_mode  mode) {
      GGML_ASSERT(a->ne[0] <= ne0);
      GGML_ASSERT(a->ne[1] <= ne1);
      GGML_ASSERT(a->ne[2] <= ne2);
@@ -4182,6 +4183,8 @@ static struct ggml_tensor * ggml_upscale_impl(
  
      struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  
+    ggml_set_op_params_i32(result, 0, mode);
+
      result->op     = GGML_OP_UPSCALE;
      result->src[0] = a;
  
@@ -4191,8 +4194,9 @@ static struct ggml_tensor * ggml_upscale_impl(
  struct ggml_tensor * ggml_upscale(
          struct ggml_context * ctx,
          struct ggml_tensor  * a,
-        int                   scale_factor) {
-    return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
+        int                   scale_factor,
+        enum ggml_scale_mode  mode) {
+    return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
  }
  
  struct ggml_tensor * ggml_upscale_ext(
@@ -4201,8 +4205,9 @@ struct ggml_tensor * ggml_upscale_ext(
          int                   ne0,
          int                   ne1,
          int                   ne2,
-        int                   ne3) {
-    return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
+        int                   ne3,
+        enum ggml_scale_mode  mode) {
+    return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
  }
  
  // ggml_pad
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp

index e61a126cf5b2f4c7fe4504a54ad1334731a602bc..9dc9f015a74d0afbfa4101f145d57935addb60bb 100644 (file)
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -271,6 +271,14 @@ static std::string var_to_str(ggml_op_pool pool) {
      }
  }
  
+static std::string var_to_str(ggml_scale_mode mode) {
+    switch (mode) {
+        case GGML_SCALE_MODE_NEAREST:  return "nearest";
+        case GGML_SCALE_MODE_BILINEAR: return "bilinear";
+        default:                      return std::to_string(mode);
+    }
+}
+
  #define VAR_TO_STR(x) (#x "=" + var_to_str(x))
  
  #define VARS_TO_STR1(a) VAR_TO_STR(a)
@@ -2948,15 +2956,16 @@ struct test_upscale : public test_case {
      const std::array<int64_t, 4> ne;
      const int32_t scale_factor;
      const bool transpose;
+    const ggml_scale_mode mode;
  
      std::string vars() override {
-        return VARS_TO_STR4(type, ne, scale_factor, transpose);
+        return VARS_TO_STR5(type, ne, scale_factor, mode, transpose);
      }
  
      test_upscale(ggml_type type = GGML_TYPE_F32,
              std::array<int64_t, 4> ne = {512, 512, 3, 1},
-            int32_t scale_factor = 2, bool transpose = false)
-        : type(type), ne(ne), scale_factor(scale_factor), transpose(transpose) {}
+            int32_t scale_factor = 2, ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST, bool transpose = false)
+        : type(type), ne(ne), scale_factor(scale_factor), mode(mode), transpose(transpose) {}
  
      ggml_tensor * build_graph(ggml_context * ctx) override {
          ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
@@ -2967,7 +2976,7 @@ struct test_upscale : public test_case {
              ggml_set_name(a, "a_transposed");
          }
  
-        ggml_tensor * out = ggml_upscale(ctx, a, scale_factor);
+        ggml_tensor * out = ggml_upscale(ctx, a, scale_factor, mode);
          ggml_set_name(out, "out");
  
          return out;
@@ -2979,21 +2988,23 @@ struct test_upscale_ext : public test_case {
      const ggml_type type;
      const std::array<int64_t, 4> ne;
      const std::array<int64_t, 4> ne_tgt;
+    const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
  
      std::string vars() override {
-        return VARS_TO_STR3(type, ne, ne_tgt);
+        return VARS_TO_STR4(type, ne, ne_tgt, mode);
      }
  
      test_upscale_ext(ggml_type type = GGML_TYPE_F32,
              std::array<int64_t, 4> ne     = {2, 5,  7, 11},
-            std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13})
-        : type(type), ne(ne), ne_tgt(ne_tgt) {}
+            std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
+            ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
+        : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
  
      ggml_tensor * build_graph(ggml_context * ctx) override {
          ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
          ggml_set_name(a, "a");
  
-        ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3]);
+        ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
          ggml_set_name(out, "out");
  
          return out;
@@ -4399,12 +4410,15 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
          test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {60, 10, 10, 10}, order)); // qwen
      }
  
+    for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
+        test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
+        test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
+        test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5,  7, 11}, {5, 7, 11, 13}, mode));
+    }
+
      test_cases.emplace_back(new test_sum());
      test_cases.emplace_back(new test_sum_rows());
      test_cases.emplace_back(new test_mean());
-    test_cases.emplace_back(new test_upscale());
-    test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, { 512, 512, 3, 1 }, 2, true));
-    test_cases.emplace_back(new test_upscale_ext());
      test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {64, 64, 320, 1}));
      test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
      test_cases.emplace_back(new test_acc());
author	Diego Devesa <redacted>
	Wed, 9 Apr 2025 10:32:13 +0000 (12:32 +0200)
committer	Georgi Gerganov <redacted>
	Thu, 10 Apr 2025 21:17:47 +0000 (00:17 +0300)
ggml/include/ggml.h		patch \| blob \| history
ggml/src/ggml-cann/ggml-cann.cpp		patch \| blob \| history
ggml/src/ggml-cpu/ops.cpp		patch \| blob \| history
ggml/src/ggml-cuda/ggml-cuda.cu		patch \| blob \| history
ggml/src/ggml-metal/ggml-metal.m		patch \| blob \| history
ggml/src/ggml-sycl/ggml-sycl.cpp		patch \| blob \| history
ggml/src/ggml-vulkan/ggml-vulkan.cpp		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history
tests/test-backend-ops.cpp		patch \| blob \| history