GGML_OP_UPSCALE, // nearest interpolate
GGML_OP_PAD,
GGML_OP_PAD_REFLECT_1D,
+ GGML_OP_ROLL,
GGML_OP_ARANGE,
GGML_OP_TIMESTEP_EMBEDDING,
GGML_OP_ARGSORT,
int p0,
int p1);
+ // Move tensor elements by an offset given for each dimension. Elements that
+ // are shifted beyond the last position are wrapped around to the beginning.
+ GGML_API struct ggml_tensor * ggml_roll(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int shift0,
+ int shift1,
+ int shift2,
+ int shift3);
+
+
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
// timesteps: [N,]
// return: [N, dim]
{
ggml_compute_forward_pad_reflect_1d(params, tensor);
} break;
+ case GGML_OP_ROLL:
+ {
+ ggml_compute_forward_roll(params, tensor);
+ } break;
case GGML_OP_ARANGE:
{
ggml_compute_forward_arange(params, tensor);
case GGML_OP_UPSCALE:
case GGML_OP_PAD:
case GGML_OP_PAD_REFLECT_1D:
+ case GGML_OP_ROLL:
case GGML_OP_ARANGE:
case GGML_OP_TIMESTEP_EMBEDDING:
case GGML_OP_ARGSORT:
}
}
+// ggml_compute_forward_roll
+
+static int64_t ggml_wrap_index(int64_t i, int64_t ne) {
+ if (i < 0) {
+ return i + ne;
+ } else if (i >= ne) {
+ return i - ne;
+ }
+ return i;
+}
+
+static void ggml_compute_forward_roll_f32(
+ const ggml_compute_params * params,
+ ggml_tensor * dst) {
+
+ const ggml_tensor * src0 = dst->src[0];
+ const float * src_data = (const float *) src0->data;
+ float * dst_data = (float *) dst->data;
+
+ GGML_TENSOR_UNARY_OP_LOCALS
+
+ const int s0 = ggml_get_op_params_i32(dst, 0);
+ const int s1 = ggml_get_op_params_i32(dst, 1);
+ const int s2 = ggml_get_op_params_i32(dst, 2);
+ const int s3 = ggml_get_op_params_i32(dst, 3);
+
+ const int64_t total = ne1 * ne2 * ne3;
+ const int64_t per_thread = (total + params->nth) / params->nth;
+ const int64_t start = params->ith * per_thread;
+ const int64_t end = std::min(start + per_thread, total);
+
+ for (int64_t i = start; i < end; ++i) {
+ const int64_t i1 = i % ne1;
+ const int64_t i2 = (i / ne1) % ne2;
+ const int64_t i3 = i / (ne2 * ne1);
+ float * dst_row = dst_data + (i3*nb3 + i2*nb2 + i1*nb1) / sizeof(float);
+
+ const int64_t i01 = ggml_wrap_index(i1 - s1, ne01);
+ const int64_t i02 = ggml_wrap_index(i2 - s2, ne02);
+ const int64_t i03 = ggml_wrap_index(i3 - s3, ne03);
+ const float * src_row = src_data + (i03*nb03 + i02*nb02 + i01*nb01) / sizeof(float);
+
+ const int64_t s = ggml_wrap_index(-s0, ne00);
+ const int64_t n = ne00 - s;
+ ggml_vec_cpy_f32(n, dst_row, src_row + s);
+ ggml_vec_cpy_f32(s, dst_row + n, src_row);
+ }
+}
+
+void ggml_compute_forward_roll(
+ const ggml_compute_params * params,
+ ggml_tensor * dst) {
+
+ const ggml_tensor * src0 = dst->src[0];
+
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_roll_f32(params, dst);
+ } break;
+ default:
+ {
+ GGML_ABORT("fatal error");
+ }
+ }
+}
+
// ggml_compute_forward_arange
static void ggml_compute_forward_arange_f32(
void ggml_compute_forward_upscale(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_pad(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_pad_reflect_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_roll(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_arange(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
"UPSCALE",
"PAD",
"PAD_REFLECT_1D",
+ "ROLL",
"ARANGE",
"TIMESTEP_EMBEDDING",
"ARGSORT",
"OPT_STEP_ADAMW",
};
-static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
+static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
"upscale(x)",
"pad(x)",
"pad_reflect_1d(x)",
+ "roll(x)",
"arange(start, stop, step)",
"timestep_embedding(timesteps, dim, max_period)",
"argsort(x)",
"adamw(x)",
};
-static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
+static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
return result;
}
+// ggml_roll
+
+struct ggml_tensor * ggml_roll(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int shift0,
+ int shift1,
+ int shift2,
+ int shift3) {
+ GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
+ GGML_ASSERT(abs(shift0) < a->ne[0]);
+ GGML_ASSERT(abs(shift1) < a->ne[1]);
+ GGML_ASSERT(abs(shift2) < a->ne[2]);
+ GGML_ASSERT(abs(shift3) < a->ne[3]);
+
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
+
+ ggml_set_op_params_i32(result, 0, shift0);
+ ggml_set_op_params_i32(result, 1, shift1);
+ ggml_set_op_params_i32(result, 2, shift2);
+ ggml_set_op_params_i32(result, 3, shift3);
+
+ result->op = GGML_OP_ROLL;
+ result->src[0] = a;
+
+ return result;
+}
+
// ggml_arange
struct ggml_tensor * ggml_arange(