case GGML_UNARY_OP_XIELU:
ggml_cuda_op_xielu(ctx, dst);
break;
+ case GGML_UNARY_OP_FLOOR:
+ ggml_cuda_op_floor(ctx, dst);
+ break;
+ case GGML_UNARY_OP_CEIL:
+ ggml_cuda_op_ceil(ctx, dst);
+ break;
+ case GGML_UNARY_OP_ROUND:
+ ggml_cuda_op_round(ctx, dst);
+ break;
+ case GGML_UNARY_OP_TRUNC:
+ ggml_cuda_op_trunc(ctx, dst);
+ break;
default:
return false;
}
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_EXP:
case GGML_UNARY_OP_ELU:
+ case GGML_UNARY_OP_FLOOR:
+ case GGML_UNARY_OP_CEIL:
+ case GGML_UNARY_OP_ROUND:
+ case GGML_UNARY_OP_TRUNC:
return ggml_is_contiguous(op->src[0]);
default:
return false;
return (x > 0.f) ? x : expm1f(x);
}
+static __device__ __forceinline__ float op_floor(float x) {
+ return floorf(x);
+}
+
+static __device__ __forceinline__ float op_ceil(float x) {
+ return ceilf(x);
+}
+
+static __device__ __forceinline__ float op_round(float x) {
+ return round(x);
+}
+
+static __device__ __forceinline__ float op_trunc(float x) {
+ return trunc(x);
+}
+
template <float (*op)(float), typename T>
static __global__ void unary_op_kernel(const T * x, T * dst, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;
void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
ggml_cuda_op_unary<op_elu>(ctx, dst);
}
+
+void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+ ggml_cuda_op_unary<op_floor>(ctx, dst);
+}
+
+void ggml_cuda_op_ceil(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+ ggml_cuda_op_unary<op_ceil>(ctx, dst);
+}
+
+void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+ ggml_cuda_op_unary<op_round>(ctx, dst);
+}
+
+void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+ ggml_cuda_op_unary<op_trunc>(ctx, dst);
+}
/* gated ops */
template <float (*op)(float), typename T>
void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_ceil(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
void ggml_cuda_op_reglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_geglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);