vk_pipeline pipeline_add_id_f32;
vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
- vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32;
+ vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32, pipeline_upscale_bilinear_antialias_f32;
vk_pipeline pipeline_scale_f32;
vk_pipeline pipeline_sqr_f32;
vk_pipeline pipeline_sqrt_f32;
ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1);
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1);
ggml_vk_create_pipeline(device, device->pipeline_upscale_bicubic_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BICUBIC}, 1);
+ ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_antialias_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS}, 1);
ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
return nullptr;
case GGML_OP_UPSCALE:
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
- ggml_scale_mode mode = (ggml_scale_mode)(ggml_get_op_params_i32(dst, 0) & 0xFF);
+ uint32_t mode = (ggml_get_op_params_i32(dst, 0) & (0xFF | GGML_SCALE_FLAG_ANTIALIAS));
switch (mode) {
case GGML_SCALE_MODE_NEAREST:
return ctx->device->pipeline_upscale_nearest_f32;
return ctx->device->pipeline_upscale_bilinear_f32;
case GGML_SCALE_MODE_BICUBIC:
return ctx->device->pipeline_upscale_bicubic_f32;
+ case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS:
+ return ctx->device->pipeline_upscale_bilinear_antialias_f32;
default:
return nullptr;
}
}
return true;
case GGML_OP_UPSCALE:
- return op->src[0]->type == GGML_TYPE_F32 && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS);
+ if (op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS) {
+ if ((op->op_params[0] & 0xFF) != GGML_SCALE_MODE_BILINEAR) {
+ return false;
+ }
+ }
+ return op->src[0]->type == GGML_TYPE_F32;
case GGML_OP_ACC:
return op->src[0]->type == GGML_TYPE_F32;
case GGML_OP_CONCAT:
#define NEAREST 0
#define BILINEAR 1
#define BICUBIC 2
+#define BILINEAR_ANTIALIAS 513
layout (constant_id = 0) const uint scale_mode = 0;
return fetch_bilinear(c0, c1, d, i12, i13);
}
+float triangle_filter(float x) {
+ return max(1.0f - abs(x), 0.0f);
+}
+
+float interpolate_bilinear_antialias(uint i10, uint i11, uint i12, uint i13) {
+ const float support1 = max(1.0f, 1.0f / p.sf1);
+ const float invscale1 = 1.0f / support1;
+ const float support0 = max(1.0f, 1.0f / p.sf0);
+ const float invscale0 = 1.0f / support0;
+
+ const uint i02 = uint(i12 / p.sf2);
+ const uint i03 = uint(i13 / p.sf3);
+
+ const float y = (float(i11) + p.pixel_offset) / p.sf1;
+ const float x = (float(i10) + p.pixel_offset) / p.sf0;
+
+ // the range of source pixels that contribute
+ const int x_min = max(int(x - support0 + p.pixel_offset), 0);
+ const int x_max = min(int(x + support0 + p.pixel_offset), int(p.ne00));
+ const int y_min = max(int(y - support1 + p.pixel_offset), 0);
+ const int y_max = min(int(y + support1 + p.pixel_offset), int(p.ne01));
+
+ // bilinear filter with antialiasing
+ float val = 0.0f;
+ float total_weight = 0.0f;
+
+ for (int sy = y_min; sy < y_max; sy++) {
+ const float weight_y = triangle_filter((sy - y + p.pixel_offset) * invscale1);
+
+ for (int sx = x_min; sx < x_max; sx++) {
+ const float weight_x = triangle_filter((sx - x + p.pixel_offset) * invscale0);
+ const float weight = weight_x * weight_y;
+
+ if (weight <= 0.0f) {
+ continue;
+ }
+
+ const float pixel = data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + sy * p.nb01 + sx * p.nb00];
+ val += pixel * weight;
+ total_weight += weight;
+ }
+ }
+
+ if (total_weight > 0.0f) {
+ val /= total_weight;
+ }
+
+ return val;
+}
+
// Bicubic interpolation with alpha = -0.75
// https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
const vec4 bcoeffs1 = vec4( 1.25, -2.25, 0.0, 1.0);
case BICUBIC:
result = interpolate_bicubic(i10, i11, i12, i13);
break;
+ case BILINEAR_ANTIALIAS:
+ result = interpolate_bilinear_antialias(i10, i11, i12, i13);
+ break;
}
data_d[p.d_offset + idx] = D_TYPE(result);
}
static std::string var_to_str(ggml_scale_mode mode) {
- switch (mode) {
- case GGML_SCALE_MODE_NEAREST: return "nearest";
- case GGML_SCALE_MODE_BILINEAR: return "bilinear";
- case GGML_SCALE_MODE_BICUBIC: return "bicubic";
- default: return std::to_string(mode);
+ std::string str;
+ switch (mode & 0xFF) {
+ case GGML_SCALE_MODE_NEAREST: str = "nearest"; break;
+ case GGML_SCALE_MODE_BILINEAR: str = "bilinear"; break;
+ case GGML_SCALE_MODE_BICUBIC: str = "bicubic"; break;
+ default: str = std::to_string(mode); break;
}
+ if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
+ str += "|align_corners";
+ }
+ if (mode & GGML_SCALE_FLAG_ANTIALIAS) {
+ str += "|antialias";
+ }
+ return str;
}
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
const ggml_type type;
const std::array<int64_t, 4> ne;
const std::array<int64_t, 4> ne_tgt;
- const uint32_t mode = GGML_SCALE_MODE_NEAREST;
+ const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
std::string vars() override {
- ggml_scale_mode mode = (ggml_scale_mode)(this->mode & 0xFF);
- std::string flags = (this->mode & GGML_SCALE_FLAG_ALIGN_CORNERS) ? "align_corners" : "none";
- return VARS_TO_STR5(type, ne, ne_tgt, mode, flags);
+ return VARS_TO_STR4(type, ne, ne_tgt, mode);
}
test_interpolate(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne = {2, 5, 7, 11},
std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
- uint32_t mode = GGML_SCALE_MODE_NEAREST)
+ ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
: type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode));
}
for (ggml_scale_mode mode : {GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
- test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
- test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
- test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
+ test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
+ test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
+ test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
}
test_cases.emplace_back(new test_sum());