case GGML_UNARY_OP_EXP:
ggml_cuda_op_exp(ctx, dst);
break;
+ case GGML_UNARY_OP_ELU:
+ ggml_cuda_op_elu(ctx, dst);
+ break;
default:
return false;
}
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_EXP:
+ case GGML_UNARY_OP_ELU:
return ggml_is_contiguous(op->src[0]);
default:
return false;
return logf(x);
}
+static __device__ __forceinline__ float op_elu(float x) {
+ return (x > 0.f) ? x : expm1f(x);
+}
+
template <float (*op)(float), typename T>
static __global__ void unary_op_kernel(const T * x, T * dst, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;
ggml_cuda_op_unary<op_log>(ctx, dst);
}
+void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
+ ggml_cuda_op_unary<op_elu>(ctx, dst);
+}
/* gated ops */
template <float (*op)(float), typename T>
void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
void ggml_cuda_op_reglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_geglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);