ggml-cpu : "align corners" for bilinear upscale/downscale (ggml/1285)

author Acly <redacted>

Tue, 1 Jul 2025 07:11:00 +0000 (09:11 +0200)

committer Georgi Gerganov <redacted>

Tue, 1 Jul 2025 14:54:53 +0000 (17:54 +0300)
author Acly <redacted>
Tue, 1 Jul 2025 07:11:00 +0000 (09:11 +0200)
committer Georgi Gerganov <redacted>
Tue, 1 Jul 2025 14:54:53 +0000 (17:54 +0300)
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h

index 9c4e24023b5ad20c1d59d8686a7e03763bc3987c..4231a668eb0419c1723c2c200198ba4eabe762ea 100644 (file)
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1765,6 +1765,12 @@ extern "C" {
      enum ggml_scale_mode {
          GGML_SCALE_MODE_NEAREST  = 0,
          GGML_SCALE_MODE_BILINEAR = 1,
+
+        GGML_SCALE_MODE_COUNT
+    };
+
+    enum ggml_scale_flag {
+        GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
      };
  
      // interpolate
@@ -1777,14 +1783,26 @@ extern "C" {
  
      // interpolate
      // interpolate scale to specified dimensions
-    GGML_API struct ggml_tensor * ggml_upscale_ext(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
              struct ggml_context * ctx,
              struct ggml_tensor  * a,
              int                   ne0,
              int                   ne1,
              int                   ne2,
              int                   ne3,
-            enum ggml_scale_mode  mode);
+            enum ggml_scale_mode  mode),
+        "use ggml_interpolate instead");
+
+    // Up- or downsamples the input to the specified size.
+    // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
+    GGML_API struct ggml_tensor * ggml_interpolate(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int64_t               ne0,
+            int64_t               ne1,
+            int64_t               ne2,
+            int64_t               ne3,
+            uint32_t              mode); // ggml_scale_mode [ | ggml_scale_flag...]
  
      // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
      GGML_API struct ggml_tensor * ggml_pad(
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp

index eff4a53e3442b4ffa0ec73f158f4f62091a6b6ca..f6ebb4ea7632875daf28be7f53778007761c470b 100644 (file)
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -6608,12 +6608,13 @@ static void ggml_compute_forward_upscale_f32(
  
      GGML_TENSOR_UNARY_OP_LOCALS
  
-    const float sf0 = (float)ne0/src0->ne[0];
-    const float sf1 = (float)ne1/src0->ne[1];
-    const float sf2 = (float)ne2/src0->ne[2];
-    const float sf3 = (float)ne3/src0->ne[3];
+    float sf0 = (float)ne0/src0->ne[0];
+    float sf1 = (float)ne1/src0->ne[1];
+    float sf2 = (float)ne2/src0->ne[2];
+    float sf3 = (float)ne3/src0->ne[3];
  
-    const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
+    const int32_t mode_flags = ggml_get_op_params_i32(dst, 0);
+    const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
  
      if (mode == GGML_SCALE_MODE_NEAREST) {
          for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -6634,8 +6635,12 @@ static void ggml_compute_forward_upscale_f32(
              }
          }
      } else if (mode == GGML_SCALE_MODE_BILINEAR) {
-        // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
-        const float pixel_offset = 0.5f;
+        float pixel_offset = 0.5f;
+        if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
+            pixel_offset = 0.0f;
+            sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
+            sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
+        }
  
          for (int64_t i3 = 0; i3 < ne3; i3++) {
              const int64_t i03 = i3 / sf3;
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index f8e7c595bce15dd214c8ef366bdd56087e3eff78..354a805179c7fae5d9b5f71f1571329be807540a 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -4247,24 +4247,21 @@ struct ggml_tensor * ggml_pool_2d_back(
      return result;
  }
  
-// ggml_upscale
+// ggml_upscale / ggml_interpolate
  
-static struct ggml_tensor * ggml_upscale_impl(
+static struct ggml_tensor * ggml_interpolate_impl(
          struct ggml_context * ctx,
          struct ggml_tensor  * a,
-        int                   ne0,
-        int                   ne1,
-        int                   ne2,
-        int                   ne3,
-        enum ggml_scale_mode  mode) {
-    GGML_ASSERT(a->ne[0] <= ne0);
-    GGML_ASSERT(a->ne[1] <= ne1);
-    GGML_ASSERT(a->ne[2] <= ne2);
-    GGML_ASSERT(a->ne[3] <= ne3);
-
+        int64_t               ne0,
+        int64_t               ne1,
+        int64_t               ne2,
+        int64_t               ne3,
+        uint32_t              mode) {
+    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
+    
      struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  
-    ggml_set_op_params_i32(result, 0, mode);
+    ggml_set_op_params_i32(result, 0, (int32_t)mode);
  
      result->op     = GGML_OP_UPSCALE;
      result->src[0] = a;
@@ -4277,7 +4274,8 @@ struct ggml_tensor * ggml_upscale(
          struct ggml_tensor  * a,
          int                   scale_factor,
          enum ggml_scale_mode  mode) {
-    return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
+    GGML_ASSERT(scale_factor > 1);
+    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
  }
  
  struct ggml_tensor * ggml_upscale_ext(
@@ -4288,7 +4286,18 @@ struct ggml_tensor * ggml_upscale_ext(
          int                   ne2,
          int                   ne3,
          enum ggml_scale_mode  mode) {
-    return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
+    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
+}
+
+struct ggml_tensor * ggml_interpolate(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        int64_t               ne0,
+        int64_t               ne1,
+        int64_t               ne2,
+        int64_t               ne3,
+        uint32_t              mode) {
+    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
  }
  
  // ggml_pad
author	Acly <redacted>
	Tue, 1 Jul 2025 07:11:00 +0000 (09:11 +0200)
committer	Georgi Gerganov <redacted>
	Tue, 1 Jul 2025 14:54:53 +0000 (17:54 +0300)
ggml/include/ggml.h		patch \| blob \| history
ggml/src/ggml-cpu/ops.cpp		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history