vulkan: Support UPSCALE w/antialias (llama/18327)

author Jeff Bolz <redacted>

Fri, 26 Dec 2025 16:00:57 +0000 (10:00 -0600)

committer Georgi Gerganov <redacted>

Wed, 31 Dec 2025 10:39:43 +0000 (12:39 +0200)
author Jeff Bolz <redacted>
Fri, 26 Dec 2025 16:00:57 +0000 (10:00 -0600)
committer Georgi Gerganov <redacted>
Wed, 31 Dec 2025 10:39:43 +0000 (12:39 +0200)
diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp

index e7ce518fbac68eca67ca24881ccaab09b5730a81..7ae2e38356e4754c3baddb7e1865027291b9e2f9 100644 (file)
--- a/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/src/ggml-vulkan/ggml-vulkan.cpp
@@ -651,7 +651,7 @@ struct vk_device_struct {
      vk_pipeline pipeline_add_id_f32;
  
      vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
-    vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32;
+    vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32, pipeline_upscale_bilinear_antialias_f32;
      vk_pipeline pipeline_scale_f32;
      vk_pipeline pipeline_sqr_f32;
      vk_pipeline pipeline_sqrt_f32;
@@ -3956,6 +3956,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
      ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1);
      ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1);
      ggml_vk_create_pipeline(device, device->pipeline_upscale_bicubic_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BICUBIC}, 1);
+    ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_antialias_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS}, 1);
  
      ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
  
@@ -8433,7 +8434,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
          return nullptr;
      case GGML_OP_UPSCALE:
          if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
-            ggml_scale_mode mode = (ggml_scale_mode)(ggml_get_op_params_i32(dst, 0) & 0xFF);
+            uint32_t mode = (ggml_get_op_params_i32(dst, 0) & (0xFF | GGML_SCALE_FLAG_ANTIALIAS));
              switch (mode) {
                  case GGML_SCALE_MODE_NEAREST:
                      return ctx->device->pipeline_upscale_nearest_f32;
@@ -8441,6 +8442,8 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
                      return ctx->device->pipeline_upscale_bilinear_f32;
                  case GGML_SCALE_MODE_BICUBIC:
                      return ctx->device->pipeline_upscale_bicubic_f32;
+                case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS:
+                    return ctx->device->pipeline_upscale_bilinear_antialias_f32;
                  default:
                      return nullptr;
              }
@@ -14341,7 +14344,12 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
              }
              return true;
          case GGML_OP_UPSCALE:
-            return op->src[0]->type == GGML_TYPE_F32 && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS);
+            if (op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS) {
+                if ((op->op_params[0] & 0xFF) != GGML_SCALE_MODE_BILINEAR) {
+                    return false;
+                }
+            }
+            return op->src[0]->type == GGML_TYPE_F32;
          case GGML_OP_ACC:
              return op->src[0]->type == GGML_TYPE_F32;
          case GGML_OP_CONCAT:
diff --git a/src/ggml-vulkan/vulkan-shaders/upscale.comp b/src/ggml-vulkan/vulkan-shaders/upscale.comp

index 037ab0c78f0f96ff6fc04934a4b1fcb75c14f162..f7d12a8dda6ca74911398c777f076bcbb9fb7585 100644 (file)
--- a/src/ggml-vulkan/vulkan-shaders/upscale.comp
+++ b/src/ggml-vulkan/vulkan-shaders/upscale.comp
@@ -21,6 +21,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
  #define NEAREST  0
  #define BILINEAR 1
  #define BICUBIC  2
+#define BILINEAR_ANTIALIAS 513
  
  layout (constant_id = 0) const uint scale_mode = 0;
  
@@ -62,6 +63,56 @@ float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
      return fetch_bilinear(c0, c1, d, i12, i13);
  }
  
+float triangle_filter(float x) {
+    return max(1.0f - abs(x), 0.0f);
+}
+
+float interpolate_bilinear_antialias(uint i10, uint i11, uint i12, uint i13) {
+    const float support1  = max(1.0f, 1.0f / p.sf1);
+    const float invscale1 = 1.0f / support1;
+    const float support0  = max(1.0f, 1.0f / p.sf0);
+    const float invscale0 = 1.0f / support0;
+
+    const uint i02 = uint(i12 / p.sf2);
+    const uint i03 = uint(i13 / p.sf3);
+
+    const float y = (float(i11) + p.pixel_offset) / p.sf1;
+    const float x = (float(i10) + p.pixel_offset) / p.sf0;
+
+    // the range of source pixels that contribute
+    const int x_min = max(int(x - support0 + p.pixel_offset), 0);
+    const int x_max = min(int(x + support0 + p.pixel_offset), int(p.ne00));
+    const int y_min = max(int(y - support1 + p.pixel_offset), 0);
+    const int y_max = min(int(y + support1 + p.pixel_offset), int(p.ne01));
+
+    // bilinear filter with antialiasing
+    float val = 0.0f;
+    float total_weight = 0.0f;
+
+    for (int sy = y_min; sy < y_max; sy++) {
+        const float weight_y = triangle_filter((sy - y + p.pixel_offset) * invscale1);
+
+        for (int sx = x_min; sx < x_max; sx++) {
+            const float weight_x = triangle_filter((sx - x + p.pixel_offset) * invscale0);
+            const float weight = weight_x * weight_y;
+
+            if (weight <= 0.0f) {
+                continue;
+            }
+
+            const float pixel = data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + sy * p.nb01 + sx * p.nb00];
+            val += pixel * weight;
+            total_weight += weight;
+        }
+    }
+
+    if (total_weight > 0.0f) {
+        val /= total_weight;
+    }
+
+    return val;
+}
+
  // Bicubic interpolation with alpha = -0.75
  // https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
  const vec4 bcoeffs1 = vec4( 1.25, -2.25,  0.0, 1.0);
@@ -118,6 +169,9 @@ void main() {
          case BICUBIC:
              result = interpolate_bicubic(i10, i11, i12, i13);
              break;
+        case BILINEAR_ANTIALIAS:
+            result = interpolate_bilinear_antialias(i10, i11, i12, i13);
+            break;
      }
  
      data_d[p.d_offset + idx] = D_TYPE(result);
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp

index a801455f16fc21036056275c400e9bef99cbb022..0b981b1788397b903e03ad623cb306ed853e4d6b 100644 (file)
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -402,12 +402,20 @@ static std::string var_to_str(ggml_op_pool pool) {
  }
  
  static std::string var_to_str(ggml_scale_mode mode) {
-    switch (mode) {
-        case GGML_SCALE_MODE_NEAREST:  return "nearest";
-        case GGML_SCALE_MODE_BILINEAR: return "bilinear";
-        case GGML_SCALE_MODE_BICUBIC:  return "bicubic";
-        default:                       return std::to_string(mode);
+    std::string str;
+    switch (mode & 0xFF) {
+        case GGML_SCALE_MODE_NEAREST:  str = "nearest"; break;
+        case GGML_SCALE_MODE_BILINEAR: str = "bilinear"; break;
+        case GGML_SCALE_MODE_BICUBIC:  str = "bicubic"; break;
+        default:                       str = std::to_string(mode); break;
      }
+    if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
+        str += "|align_corners";
+    }
+    if (mode & GGML_SCALE_FLAG_ANTIALIAS) {
+        str += "|antialias";
+    }
+    return str;
  }
  
  #define VAR_TO_STR(x) (#x "=" + var_to_str(x))
@@ -5535,18 +5543,16 @@ struct test_interpolate : public test_case {
      const ggml_type type;
      const std::array<int64_t, 4> ne;
      const std::array<int64_t, 4> ne_tgt;
-    const uint32_t mode = GGML_SCALE_MODE_NEAREST;
+    const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
  
      std::string vars() override {
-        ggml_scale_mode mode = (ggml_scale_mode)(this->mode & 0xFF);
-        std::string flags = (this->mode & GGML_SCALE_FLAG_ALIGN_CORNERS) ? "align_corners" : "none";
-        return VARS_TO_STR5(type, ne, ne_tgt, mode, flags);
+        return VARS_TO_STR4(type, ne, ne_tgt, mode);
      }
  
      test_interpolate(ggml_type type = GGML_TYPE_F32,
              std::array<int64_t, 4> ne     = {2, 5,  7, 11},
              std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
-            uint32_t mode = GGML_SCALE_MODE_NEAREST)
+            ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
          : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
  
      ggml_tensor * build_graph(ggml_context * ctx) override {
@@ -7883,9 +7889,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
          test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5,  7, 11}, mode));
      }
      for (ggml_scale_mode mode : {GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
-        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
-        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
-        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
      }
  
      test_cases.emplace_back(new test_sum());
author	Jeff Bolz <redacted>
	Fri, 26 Dec 2025 16:00:57 +0000 (10:00 -0600)
committer	Georgi Gerganov <redacted>
	Wed, 31 Dec 2025 10:39:43 +0000 (12:39 +0200)
src/ggml-vulkan/ggml-vulkan.cpp		patch \| blob \| history
src/ggml-vulkan/vulkan-shaders/upscale.comp		patch \| blob \| history
tests/test-backend-ops.cpp		patch \| blob \| history