GGML_OP_COS,
GGML_OP_SUM,
GGML_OP_SUM_ROWS,
+ GGML_OP_CUMSUM,
GGML_OP_MEAN,
GGML_OP_ARGMAX,
GGML_OP_COUNT_EQUAL,
GGML_OP_TIMESTEP_EMBEDDING,
GGML_OP_ARGSORT,
GGML_OP_LEAKY_RELU,
+ GGML_OP_TRI,
+ GGML_OP_FILL,
GGML_OP_FLASH_ATTN_EXT,
GGML_OP_FLASH_ATTN_BACK,
GGML_OP_RWKV_WKV6,
GGML_OP_GATED_LINEAR_ATTN,
GGML_OP_RWKV_WKV7,
+ GGML_OP_SOLVE_TRI,
GGML_OP_UNARY,
GGML_UNARY_OP_HARDSWISH,
GGML_UNARY_OP_HARDSIGMOID,
GGML_UNARY_OP_EXP,
+ GGML_UNARY_OP_EXPM1,
+ GGML_UNARY_OP_SOFTPLUS,
GGML_UNARY_OP_GELU_ERF,
GGML_UNARY_OP_XIELU,
GGML_UNARY_OP_FLOOR,
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
};
+ enum ggml_tri_type {
+ GGML_TRI_TYPE_UPPER_DIAG = 0,
+ GGML_TRI_TYPE_UPPER = 1,
+ GGML_TRI_TYPE_LOWER_DIAG = 2,
+ GGML_TRI_TYPE_LOWER = 3
+ };
+
struct ggml_init_params {
// memory pool
size_t mem_size; // bytes
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_expm1(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_expm1_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_softplus(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_softplus_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_sin(
struct ggml_context * ctx,
struct ggml_tensor * a);
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_cumsum(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
// mean along rows
GGML_API struct ggml_tensor * ggml_mean(
struct ggml_context * ctx,
int shift2,
int shift3);
+ // Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
+ // zeroes everywhere outside the masked area
+ GGML_API struct ggml_tensor * ggml_tri(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ enum ggml_tri_type type);
+
+ // Fill tensor a with constant c
+ GGML_API struct ggml_tensor * ggml_fill(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ float c);
+
+ GGML_API struct ggml_tensor * ggml_fill_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ float c);
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
// timesteps: [N,]
struct ggml_tensor * b,
struct ggml_tensor * state);
+ /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
+ * without zeroes on the diagonal (i.e. invertible).
+ * B can have any number of columns, but must have the same number of rows as A
+ * If A is [n, n] and B is [n, m], then the result will be [n, m] as well
+ * Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
+ * where n > 100 sparingly, pre-chunk if necessary.
+ *
+ * If left = false, solves xA=B instead
+ * If lower = false, assumes upper triangular instead
+ * If uni = true, assumes diagonal of A to be all ones (will override actual values)
+ *
+ * TODO: currently only lower, right, non-unitriangular variant is implemented
+ */
+ GGML_API struct ggml_tensor * ggml_solve_tri(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ bool left,
+ bool lower,
+ bool uni);
+
// custom operators
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
{
ggml_compute_forward_sum_rows(params, tensor);
} break;
+ case GGML_OP_CUMSUM:
+ {
+ ggml_compute_forward_cumsum(params, tensor);
+ } break;
case GGML_OP_MEAN:
{
ggml_compute_forward_mean(params, tensor);
{
ggml_compute_forward_leaky_relu(params, tensor);
} break;
+ case GGML_OP_TRI:
+ {
+ ggml_compute_forward_tri(params, tensor);
+ } break;
+ case GGML_OP_FILL:
+ {
+ ggml_compute_forward_fill(params, tensor);
+ } break;
case GGML_OP_FLASH_ATTN_EXT:
{
ggml_compute_forward_flash_attn_ext(params, tensor);
{
ggml_compute_forward_rwkv_wkv7(params, tensor);
} break;
+ case GGML_OP_SOLVE_TRI:
+ {
+ ggml_compute_forward_solve_tri(params, tensor);
+ } break;
case GGML_OP_MAP_CUSTOM1:
{
ggml_compute_forward_map_custom1(params, tensor);
case GGML_OP_ADD_ID:
case GGML_OP_ADD1:
case GGML_OP_ACC:
+ case GGML_OP_CUMSUM:
+ case GGML_OP_TRI:
+ case GGML_OP_FILL:
{
n_tasks = n_threads;
} break;
n_tasks = 1;
} break;
case GGML_OP_COUNT_EQUAL:
+ case GGML_OP_SOLVE_TRI:
{
n_tasks = n_threads;
} break;
case GGML_UNARY_OP_HARDSWISH:
case GGML_UNARY_OP_HARDSIGMOID:
case GGML_UNARY_OP_EXP:
+ case GGML_UNARY_OP_SOFTPLUS:
+ case GGML_UNARY_OP_EXPM1:
case GGML_UNARY_OP_FLOOR:
case GGML_UNARY_OP_CEIL:
case GGML_UNARY_OP_ROUND:
#include <cfloat>
#include <algorithm>
+#include <cmath>
#include <functional>
// ggml_compute_forward_dup
}
}
+// ggml_compute_forward_cumsum
+
+static void ggml_compute_forward_cumsum_f32(
+ const ggml_compute_params * params,
+ ggml_tensor * dst) {
+
+ const ggml_tensor * src0 = dst->src[0];
+
+ GGML_ASSERT(src0->nb[0] == sizeof(float));
+ GGML_ASSERT(dst->nb[0] == sizeof(float));
+
+ GGML_TENSOR_UNARY_OP_LOCALS
+
+ GGML_ASSERT(ne0 == ne00);
+ GGML_ASSERT(ne1 == ne01);
+ GGML_ASSERT(ne2 == ne02);
+ GGML_ASSERT(ne3 == ne03);
+
+ const auto [ir0, ir1] = get_thread_range(params, src0);
+
+ for (int64_t ir = ir0; ir < ir1; ++ir) {
+ const int64_t i03 = ir/(ne02*ne01);
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
+
+ float * src_row = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
+ float * dst_row = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
+
+ ggml_vec_cumsum_f32(ne00, dst_row, src_row);
+ }
+}
+
+void ggml_compute_forward_cumsum(
+ const ggml_compute_params * params,
+ ggml_tensor * dst) {
+
+ const ggml_tensor * src0 = dst->src[0];
+
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_cumsum_f32(params, dst);
+ } break;
+ default:
+ {
+ GGML_ABORT("fatal error");
+ }
+ }
+}
+
// ggml_compute_forward_sum_rows
static void ggml_compute_forward_sum_rows_f32(
}
}
+// ggml_compute_fill
+
+static void ggml_compute_forward_fill_f32(const ggml_compute_params * params, ggml_tensor * dst) {
+ const float c = ggml_get_op_params_f32(dst, 0);
+
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+
+ const auto [ir0, ir1] = get_thread_range(params, dst);
+
+ for (int64_t ir = ir0; ir < ir1; ++ir) {
+ const int64_t i03 = ir/(ne2*ne1);
+ const int64_t i02 = (ir - i03*ne2*ne1)/ne1;
+ const int64_t i01 = (ir - i03*ne2*ne1 - i02*ne1);
+
+ float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
+
+ ggml_vec_set_f32(ne0, dst_ptr, c);
+ }
+}
+
+void ggml_compute_forward_fill(const ggml_compute_params * params, ggml_tensor * dst) {
+ ggml_compute_forward_fill_f32(params, dst);
+}
+
+// ggml_compute_tri
+
+static void ggml_compute_forward_tri_f32(const ggml_compute_params * params, ggml_tensor * dst) {
+ const ggml_tensor * src0 = dst->src[0];
+
+ const ggml_tri_type ttype = (ggml_tri_type) ggml_get_op_params_i32(dst, 0);
+
+ GGML_ASSERT(ggml_is_contiguous(src0));
+
+ GGML_TENSOR_UNARY_OP_LOCALS
+
+ const auto [ir0, ir1] = get_thread_range(params, src0);
+
+ bool (*bipred)(int, int);
+
+ switch (ttype) {
+ case GGML_TRI_TYPE_LOWER: bipred = [](int i, int r) { return i < r; }; break;
+ case GGML_TRI_TYPE_LOWER_DIAG: bipred = [](int i, int r) { return i <= r; }; break;
+ case GGML_TRI_TYPE_UPPER: bipred = [](int i, int r) { return i > r; }; break;
+ case GGML_TRI_TYPE_UPPER_DIAG: bipred = [](int i, int r) { return i >= r; }; break;
+ default: GGML_ABORT("invalid tri type");
+ }
+
+ for (int64_t ir = ir0; ir < ir1; ++ir) {
+ const int64_t i03 = ir/(ne02*ne01);
+ const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
+ const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
+
+ const float * src_ptr = (const float *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
+ float * dst_ptr = ( float *) (( char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
+
+ for (int i0 = 0; i0 < ne0; ++i0) {
+ dst_ptr[i0] = bipred(i0, i01) ? src_ptr[i0] : 0.0f;
+ }
+ }
+}
+
+void ggml_compute_forward_tri(const ggml_compute_params * params, ggml_tensor * dst) {
+ const ggml_tensor * src0 = dst->src[0];
+
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_tri_f32(params, dst);
+ } break;
+ default:
+ {
+ GGML_ABORT("fatal error");
+ }
+ }
+}
+
// ggml_compute_forward_gelu_erf
static void ggml_compute_forward_gelu_erf_f32(
// n_head
for (int h = ih0; h < ih1; ++h) {
// ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
- const float dt_soft_plus = ggml_softplus(dt[h]);
+ const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
const float dA = expf(dt_soft_plus * A[h]);
const int g = h / (nh / ng); // repeat_interleave
// n_head
for (int h = ih0; h < ih1; ++h) {
// ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
- const float dt_soft_plus = ggml_softplus(dt[h]);
+ const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
const int g = h / (nh / ng); // repeat_interleave
// dim
{
ggml_compute_forward_xielu(params, dst);
} break;
+ case GGML_UNARY_OP_EXPM1:
+ {
+ ggml_compute_forward_expm1(params, dst);
+ } break;
+ case GGML_UNARY_OP_SOFTPLUS:
+ {
+ ggml_compute_forward_softplus(params, dst);
+ } break;
default:
{
GGML_ABORT("fatal error");
}
}
+static void ggml_compute_forward_solve_tri_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0]; // A (lower triangular)
+ const struct ggml_tensor * src1 = dst->src[1]; // B (RHS)
+
+ GGML_TENSOR_BINARY_OP_LOCALS;
+
+ GGML_ASSERT(src0->type == GGML_TYPE_F32);
+ GGML_ASSERT(src1->type == GGML_TYPE_F32);
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
+
+ GGML_ASSERT(ne00 == ne01); // A must be square
+ GGML_ASSERT(ne0 == ne10); // solution cols == B cols
+ GGML_ASSERT(ne1 == ne11); // solution rows == B rows
+
+ GGML_ASSERT(ne02 == ne12 && ne12 == ne2);
+ GGML_ASSERT(ne03 == ne13 && ne13 == ne3);
+
+ const int ith = params->ith;
+ const int nth = params->nth;
+
+ const int64_t k = ne10; // number of RHS columns
+ const int64_t n = ne11; // A is n×n
+ const int64_t nr = ne02 * ne03 * k; // we're parallelizing on columns here, so seq x token x column will be the unit
+
+ // chunks per thread
+ const int64_t dr = (nr + nth - 1)/nth;
+
+ // chunk range for this thread
+ const int64_t ir0 = dr*ith;
+ const int64_t ir1 = MIN(ir0 + dr, nr);
+
+ const float * A = (const float *) src0->data; // [n, n, B1, B2]
+ const float * B = (const float *) src1->data; // [n, k, B1, B2]
+ float * X = ( float *) dst->data; // [n, k, B1, B2]
+
+ for (int64_t ir = ir0; ir < ir1; ++ir) {
+ const int64_t i03 = ir/(ne02*k);
+ const int64_t i02 = (ir - i03*ne02*k)/k;
+ const int64_t i01 = (ir - i03*ne02*k - i02*k);
+
+ const float * A_batch = A + i02 * nb02 / sizeof(float) + i03 * nb03 / sizeof(float);
+ const float * B_batch = B + i02 * nb12 / sizeof(float) + i03 * nb13 / sizeof(float);
+
+ float * X_batch = X + i02 * nb2 / sizeof(float) + i03 * nb3 / sizeof(float);
+
+ for (int64_t i00 = 0; i00 < n; ++i00) {
+ float sum = 0.0f;
+ for (int64_t t = 0; t < i00; ++t) {
+ sum += A_batch[i00 * n + t] * X_batch[i01 * n + t];
+ }
+
+ const float diag = A_batch[i00 * n + i00];
+ GGML_ASSERT(diag != 0.0f && "Zero diagonal in triangular matrix");
+
+ X_batch[i01 * n + i00] = (B_batch[i00 * k + i01] - sum) / diag;
+ }
+ }
+}
+
+void ggml_compute_forward_solve_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
+ const ggml_tensor * src0 = dst->src[0];
+ const ggml_tensor * src1 = dst->src[1];
+
+ if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
+ ggml_compute_forward_solve_tri_f32(params, dst);
+ } else {
+ GGML_ABORT("fatal error");
+ }
+}
+
// ggml_compute_forward_rwkv_wkv7
static void ggml_compute_forward_rwkv_wkv7_f32(
void ggml_compute_forward_acc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_sum(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_sum_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_cumsum(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_mean(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_argmax(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_count_equal(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_leaky_relu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_fill(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_flash_attn_ext(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_flash_attn_back(
const struct ggml_compute_params * params,
void ggml_compute_forward_add_rel_pos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_rwkv_wkv6(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_rwkv_wkv7(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_solve_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_gla(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_map_custom1(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_map_custom2(const struct ggml_compute_params * params, struct ggml_tensor * dst);
return logf(x);
}
+static inline float op_expm1(float x) {
+ return expf(x) - 1.0f;
+}
+
+static inline float op_softplus(float x) {
+ return (x > 20.0f) ? x : logf(1.0f + expf(x));
+}
+
static inline float op_floor(float x) {
return floorf(x);
}
unary_op<op_log>(params, dst);
}
+void ggml_compute_forward_expm1(const ggml_compute_params * params, ggml_tensor * dst) {
+ unary_op<op_expm1>(params, dst);
+}
+
+void ggml_compute_forward_softplus(const ggml_compute_params * params, ggml_tensor * dst) {
+ unary_op<op_softplus>(params, dst);
+}
+
void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_floor>(params, dst);
}
void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_expm1(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_softplus(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_floor(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst);
#endif
}
+inline static void ggml_vec_cumsum_f32(const int n, float * y, const float * x) {
+ for (int i = 0; i < n; ++i) {
+ if (i == 0) {
+ y[i] = x[i];
+ } else {
+ y[i] = y[i - 1] + x[i];
+ }
+ }
+}
+
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
ggml_float sum = 0.0;
for (int i = 0; i < n; ++i) {
case GGML_UNARY_OP_TRUNC:
ggml_cuda_op_trunc(ctx, dst);
break;
+ case GGML_UNARY_OP_EXPM1:
+ ggml_cuda_op_expm1(ctx, dst);
+ break;
+ case GGML_UNARY_OP_SOFTPLUS:
+ ggml_cuda_op_softplus(ctx, dst);
+ break;
default:
return false;
}
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_EXP:
+ case GGML_UNARY_OP_EXPM1:
+ case GGML_UNARY_OP_SOFTPLUS:
case GGML_UNARY_OP_ELU:
case GGML_UNARY_OP_FLOOR:
case GGML_UNARY_OP_CEIL:
return logf(x);
}
+static __device__ __forceinline__ float op_expm1(float x) {
+ return expm1f(x);
+}
+
+static __device__ __forceinline__ float op_softplus(float x) {
+ return (x > 20.0f) ? x : logf(1.0f + expf(x));
+}
+
static __device__ __forceinline__ float op_elu(float x) {
return (x > 0.f) ? x : expm1f(x);
}
void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
ggml_cuda_op_unary<op_trunc>(ctx, dst);
}
+
+void ggml_cuda_op_expm1(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+ ggml_cuda_op_unary<op_expm1>(ctx, dst);
+}
+
+void ggml_cuda_op_softplus(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+ ggml_cuda_op_unary<op_softplus>(ctx, dst);
+}
/* gated ops */
template <float (*op)(float), typename T>
void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+void ggml_cuda_op_expm1(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_softplus(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
}
}
-static inline float ggml_softplus(float input) {
+static inline float ggml_compute_softplus_f32(float input) {
return (input > 20.0f) ? input : logf(1 + expf(input));
}
//
"COS",
"SUM",
"SUM_ROWS",
+ "CUMSUM",
"MEAN",
"ARGMAX",
"COUNT_EQUAL",
"TIMESTEP_EMBEDDING",
"ARGSORT",
"LEAKY_RELU",
+ "TRI",
+ "FILL",
"FLASH_ATTN_EXT",
"FLASH_ATTN_BACK",
"RWKV_WKV6",
"GATED_LINEAR_ATTN",
"RWKV_WKV7",
+ "SOLVE_TRI",
"UNARY",
"GLU",
};
-static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
+static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
"cos(x)",
"Σx",
"Σx_k",
+ "cumsum(x)",
"Σx/n",
"argmax(x)",
"count_equal(x)",
"timestep_embedding(timesteps, dim, max_period)",
"argsort(x)",
"leaky_relu(x)",
+ "tri(x)",
+ "fill(x, c)",
"flash_attn_ext(x)",
"flash_attn_back(x)",
"rwkv_wkv6(k, v, r, tf, td, s)",
"gated_linear_attn(k, v, q, gate, s)",
"rwkv_wkv7(r, w, k, v, a, b, s)",
+ "A X = B, A triangular, solve X",
"unary(x)",
"glu(x)",
};
-static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
+static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
"HARDSWISH",
"HARDSIGMOID",
"EXP",
+ "EXPM1",
+ "SOFTPLUS",
"GELU_ERF",
"XIELU",
"FLOOR",
"TRUNC",
};
-static_assert(GGML_UNARY_OP_COUNT == 20, "GGML_UNARY_OP_COUNT != 20");
+static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
"REGLU",
return ggml_log_impl(ctx, a, true);
}
+struct ggml_tensor * ggml_expm1(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
+}
+
+struct ggml_tensor * ggml_expm1_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
+}
+
+struct ggml_tensor * ggml_softplus(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
+}
+
+struct ggml_tensor * ggml_softplus_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
+}
+
// ggml_sin
static struct ggml_tensor * ggml_sin_impl(
return result;
}
+// ggml_cumsum
+
+struct ggml_tensor * ggml_cumsum(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
+
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
+
+ result->op = GGML_OP_CUMSUM;
+ result->src[0] = a;
+
+ return result;
+}
+
// ggml_mean
struct ggml_tensor * ggml_mean(
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
- ggml_set_op_params_f32(result, 1, beta + ggml_softplus(alpha_n));
- ggml_set_op_params_f32(result, 2, ggml_softplus(alpha_p));
+ ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
+ ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
ggml_set_op_params_f32(result, 3, beta);
ggml_set_op_params_f32(result, 4, eps);
return result;
}
+// ggml_tri
+
+struct ggml_tensor * ggml_tri(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ enum ggml_tri_type type) {
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
+
+ GGML_ASSERT(ggml_is_contiguous(a));
+ GGML_ASSERT(a->ne[0] == a->ne[1]);
+
+ struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
+
+ ggml_set_op_params_i32(result, 0, type);
+
+ result->op = GGML_OP_TRI;
+ result->src[0] = a;
+
+ return result;
+}
+
+// ggml_fill
+
+static struct ggml_tensor * ggml_fill_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ float c,
+ bool inplace) {
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
+ GGML_ASSERT(ggml_is_contiguous(a));
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ ggml_set_op_params_f32(result, 0, c);
+
+ result->op = GGML_OP_FILL;
+ result->src[0] = a;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_fill(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ float c) {
+ return ggml_fill_impl(ctx, a, c, false);
+}
+
+struct ggml_tensor * ggml_fill_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ float c) {
+ return ggml_fill_impl(ctx, a, c, true);
+}
+
// ggml_argsort
struct ggml_tensor * ggml_argsort(
return result;
}
+// solve_tri
+
+struct ggml_tensor * ggml_solve_tri(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ bool left,
+ bool lower,
+ bool uni) {
+ GGML_ASSERT(a->type == GGML_TYPE_F32);
+ GGML_ASSERT(b->type == GGML_TYPE_F32);
+
+ // A must be square and lower diagonal
+ GGML_ASSERT(a->ne[0] == a->ne[1]);
+ // B must have same outer dimension as A
+ GGML_ASSERT(a->ne[1] == b->ne[1]);
+
+ // batch dimensions must be equal
+ GGML_ASSERT(a->ne[2] == b->ne[2]);
+ GGML_ASSERT(a->ne[3] == b->ne[3]);
+
+ GGML_ASSERT(ggml_is_contiguous(a));
+ GGML_ASSERT(ggml_is_contiguous(b));
+
+ GGML_ASSERT(lower && left && !uni); // TODO: support other variants
+
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
+
+ result->op = GGML_OP_SOLVE_TRI;
+ result->src[0] = a;
+ result->src[1] = b;
+
+ return result;
+}
+
////////////////////////////////////////////////////////////////////////////////
struct ggml_hash_set ggml_hash_set_new(size_t size) {
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
}
} break;
+ case GGML_UNARY_OP_EXPM1: {
+ if (src0_needs_grads) {
+ ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
+ }
+ } break;
+ case GGML_UNARY_OP_SOFTPLUS: {
+ if (src0_needs_grads) {
+ ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
+ }
+ } break;
default: {
fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
__func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));