float p0,
float p1);
- // nearest interpolate
+ enum ggml_scale_mode {
+ GGML_SCALE_MODE_NEAREST = 0,
+ GGML_SCALE_MODE_BILINEAR = 1,
+ };
+
+ // interpolate
// multiplies ne0 and ne1 by scale factor
- // used in stable-diffusion
GGML_API struct ggml_tensor * ggml_upscale(
struct ggml_context * ctx,
struct ggml_tensor * a,
- int scale_factor);
+ int scale_factor,
+ enum ggml_scale_mode mode);
- // nearest interpolate
- // nearest interpolate to specified dimensions
- // used in tortoise.cpp
+ // interpolate
+ // interpolate scale to specified dimensions
GGML_API struct ggml_tensor * ggml_upscale_ext(
struct ggml_context * ctx,
struct ggml_tensor * a,
int ne0,
int ne1,
int ne2,
- int ne3);
+ int ne3,
+ enum ggml_scale_mode mode);
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
GGML_API struct ggml_tensor * ggml_pad(
if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
return false;
}
+ if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
+ return false;
+ }
return true;
}
case GGML_OP_POOL_2D: {
const float sf2 = (float)ne2/src0->ne[2];
const float sf3 = (float)ne3/src0->ne[3];
- // TODO: optimize
-
- for (int64_t i3 = 0; i3 < ne3; i3++) {
- const int64_t i03 = i3 / sf3;
- for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
- const int64_t i02 = i2 / sf2;
- for (int64_t i1 = 0; i1 < ne1; i1++) {
- const int64_t i01 = i1 / sf1;
- for (int64_t i0 = 0; i0 < ne0; i0++) {
- const int64_t i00 = i0 / sf0;
-
- const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
- float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
-
- *y = *x;
+ const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
+
+ if (mode == GGML_SCALE_MODE_NEAREST) {
+ for (int64_t i3 = 0; i3 < ne3; i3++) {
+ const int64_t i03 = i3 / sf3;
+ for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
+ const int64_t i02 = i2 / sf2;
+ for (int64_t i1 = 0; i1 < ne1; i1++) {
+ const int64_t i01 = i1 / sf1;
+ for (int64_t i0 = 0; i0 < ne0; i0++) {
+ const int64_t i00 = i0 / sf0;
+
+ const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
+ float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
+
+ *y = *x;
+ }
+ }
+ }
+ }
+ } else if (mode == GGML_SCALE_MODE_BILINEAR) {
+ // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
+ const float pixel_offset = 0.5f;
+
+ for (int64_t i3 = 0; i3 < ne3; i3++) {
+ const int64_t i03 = i3 / sf3;
+ for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
+ const int64_t i02 = i2 / sf2;
+ for (int64_t i1 = 0; i1 < ne1; i1++) {
+ const float y = ((float)i1 + pixel_offset) / sf1 - pixel_offset;
+ int64_t y0 = (int64_t)floorf(y);
+ int64_t y1 = y0 + 1;
+
+ y0 = std::max(int64_t(0), std::min(y0, ne01 - 1));
+ y1 = std::max(int64_t(0), std::min(y1, ne01 - 1));
+
+ float dy = y - (float)y0;
+ dy = std::max(0.0f, std::min(dy, 1.0f));
+
+ for (int64_t i0 = 0; i0 < ne0; i0++) {
+ const float x = ((float)i0 + pixel_offset) / sf0 - pixel_offset;
+ int64_t x0 = (int64_t)floorf(x);
+ int64_t x1 = x0 + 1;
+
+ x0 = std::max(int64_t(0), std::min(x0, ne00 - 1));
+ x1 = std::max(int64_t(0), std::min(x1, ne00 - 1));
+
+ float dx = x - (float)x0;
+ dx = std::max(0.0f, std::min(dx, 1.0f));
+
+ // fetch the four surrounding pixel values and interpolate
+ const float a = *(const float *)((const char *)src0->data + x0*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
+ const float b = *(const float *)((const char *)src0->data + x1*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
+ const float c = *(const float *)((const char *)src0->data + x0*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
+ const float d = *(const float *)((const char *)src0->data + x1*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
+
+ const float val = a*(1 - dx)*(1 - dy) + b*dx*(1 - dy) + c*(1 - dx)*dy + d*dx*dy;
+
+ float * y_dst = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
+ *y_dst = val;
+ }
}
}
}
+ } else {
+ GGML_ABORT("unsupported upscale mode");
}
}
case GGML_OP_GROUP_NORM:
return ggml_is_contiguous(op->src[0]);
case GGML_OP_UPSCALE:
+ return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
case GGML_OP_PAD:
case GGML_OP_ARANGE:
case GGML_OP_TIMESTEP_EMBEDDING:
return op->src[0]->type == GGML_TYPE_F16;
case GGML_OP_POOL_1D:
return false;
- case GGML_OP_POOL_2D:
case GGML_OP_UPSCALE:
+ return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
+ case GGML_OP_POOL_2D:
case GGML_OP_PAD:
case GGML_OP_PAD_REFLECT_1D:
case GGML_OP_TIMESTEP_EMBEDDING:
case GGML_OP_IM2COL:
// TODO: add support for the new F32 operations
return op->src[0]->type == GGML_TYPE_F16;
+ case GGML_OP_UPSCALE:
+ return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
case GGML_OP_POOL_2D:
case GGML_OP_SUM:
case GGML_OP_SUM_ROWS:
case GGML_OP_ARGSORT:
case GGML_OP_ACC:
- case GGML_OP_UPSCALE:
case GGML_OP_PAD:
case GGML_OP_LEAKY_RELU:
case GGML_OP_TIMESTEP_EMBEDDING:
}
return nullptr;
case GGML_OP_UPSCALE:
- if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
+ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 && dst->op_params[0] == GGML_SCALE_MODE_NEAREST) {
return ctx->device->pipeline_upscale_f32;
}
return nullptr;
case GGML_OP_COS:
case GGML_OP_CLAMP:
return op->src[0]->type == GGML_TYPE_F32;
+ case GGML_OP_UPSCALE:
+ return op->op_params[0] == GGML_SCALE_MODE_NEAREST;
case GGML_OP_ACC:
case GGML_OP_CONCAT:
- case GGML_OP_UPSCALE:
case GGML_OP_SCALE:
case GGML_OP_PAD:
case GGML_OP_DIAG_MASK_INF:
} else if (tensor->op == GGML_OP_CONCAT) {
tensor_clone = ggml_concat(ggml_ctx, src_clone[0], src_clone[1], *(int *)tensor->op_params);
} else if (tensor->op == GGML_OP_UPSCALE) {
- tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
+ tensor_clone = ggml_upscale_ext(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], tensor->op_params[0], tensor->op_params[1], (ggml_scale_mode) tensor->op_params[0]);
} else if (tensor->op == GGML_OP_SCALE) {
const float * params = (const float *)tensor->op_params;
tensor_clone = ggml_scale(ggml_ctx, src_clone[0], params[0]);
int ne0,
int ne1,
int ne2,
- int ne3) {
+ int ne3,
+ enum ggml_scale_mode mode) {
GGML_ASSERT(a->ne[0] <= ne0);
GGML_ASSERT(a->ne[1] <= ne1);
GGML_ASSERT(a->ne[2] <= ne2);
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
+ ggml_set_op_params_i32(result, 0, mode);
+
result->op = GGML_OP_UPSCALE;
result->src[0] = a;
struct ggml_tensor * ggml_upscale(
struct ggml_context * ctx,
struct ggml_tensor * a,
- int scale_factor) {
- return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
+ int scale_factor,
+ enum ggml_scale_mode mode) {
+ return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
}
struct ggml_tensor * ggml_upscale_ext(
int ne0,
int ne1,
int ne2,
- int ne3) {
- return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
+ int ne3,
+ enum ggml_scale_mode mode) {
+ return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
}
// ggml_pad
}
}
+static std::string var_to_str(ggml_scale_mode mode) {
+ switch (mode) {
+ case GGML_SCALE_MODE_NEAREST: return "nearest";
+ case GGML_SCALE_MODE_BILINEAR: return "bilinear";
+ default: return std::to_string(mode);
+ }
+}
+
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
#define VARS_TO_STR1(a) VAR_TO_STR(a)
const std::array<int64_t, 4> ne;
const int32_t scale_factor;
const bool transpose;
+ const ggml_scale_mode mode;
std::string vars() override {
- return VARS_TO_STR4(type, ne, scale_factor, transpose);
+ return VARS_TO_STR5(type, ne, scale_factor, mode, transpose);
}
test_upscale(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne = {512, 512, 3, 1},
- int32_t scale_factor = 2, bool transpose = false)
- : type(type), ne(ne), scale_factor(scale_factor), transpose(transpose) {}
+ int32_t scale_factor = 2, ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST, bool transpose = false)
+ : type(type), ne(ne), scale_factor(scale_factor), mode(mode), transpose(transpose) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
ggml_set_name(a, "a_transposed");
}
- ggml_tensor * out = ggml_upscale(ctx, a, scale_factor);
+ ggml_tensor * out = ggml_upscale(ctx, a, scale_factor, mode);
ggml_set_name(out, "out");
return out;
const ggml_type type;
const std::array<int64_t, 4> ne;
const std::array<int64_t, 4> ne_tgt;
+ const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
std::string vars() override {
- return VARS_TO_STR3(type, ne, ne_tgt);
+ return VARS_TO_STR4(type, ne, ne_tgt, mode);
}
test_upscale_ext(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne = {2, 5, 7, 11},
- std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13})
- : type(type), ne(ne), ne_tgt(ne_tgt) {}
+ std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
+ ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
+ : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
ggml_set_name(a, "a");
- ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3]);
+ ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
ggml_set_name(out, "out");
return out;
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {60, 10, 10, 10}, order)); // qwen
}
+ for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
+ test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
+ test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
+ test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode));
+ }
+
test_cases.emplace_back(new test_sum());
test_cases.emplace_back(new test_sum_rows());
test_cases.emplace_back(new test_mean());
- test_cases.emplace_back(new test_upscale());
- test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, { 512, 512, 3, 1 }, 2, true));
- test_cases.emplace_back(new test_upscale_ext());
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {64, 64, 320, 1}));
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
test_cases.emplace_back(new test_acc());