dst[i] = x[i] * (static_cast<T>(1.0f) / (static_cast<T>(1.0f) + sycl::native::exp(GELU_QUICK_COEF * x[i])));
}
+template<typename T>
+static void gelu_erf(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) {
+ const T SQRT_2_INV = static_cast<T>(0.70710678118654752440084436210484f);
+ for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) {
+ auto x_i = x[i];
+ dst[i] = static_cast<T>(0.5f) * x_i * (static_cast<T>(1.0f) + sycl::erf(x_i * SQRT_2_INV));
+ }
+}
+
template<typename T>
static void tanh(const T *x, T *dst, int k,
const sycl::nd_item<3> &item_ct1) {
});
}
+
+template<typename T>
+static void gelu_erf_sycl(const T *x, T *dst, const int k,
+ queue_ptr stream) {
+ const int num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE);
+ stream->parallel_for(
+ sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) *
+ sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE),
+ sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)),
+ [=](sycl::nd_item<3> item_ct1) {
+ gelu_erf(x, dst, k, item_ct1);
+ });
+}
+
template<typename T>
static void tanh_sycl(const T *x, T *dst, const int k,
queue_ptr stream) {
}
}
+inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+#if defined (GGML_SYCL_F16)
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
+ GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
+#else
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
+#endif
+ GGML_ASSERT(dst->src[0]->type == dst->type);
+ dpct::queue_ptr main_stream = ctx.stream();
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
+ switch (dst->type) {
+#if defined (GGML_SYCL_F16)
+ case GGML_TYPE_F16:
+ {
+ auto data_pts = cast_data<sycl::half>(dst);
+ gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
+ break;
+ }
+#endif
+ case GGML_TYPE_F32:
+ {
+ auto data_pts = cast_data<float>(dst);
+ gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream);
+ break;
+ }
+ default:
+ GGML_ABORT("GGML tensor type not supported!\n");
+ }
+}
+
+
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
#if defined (GGML_SYCL_F16)
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
ggml_sycl_op_gelu_quick(ctx, dst);
}
+void ggml_sycl_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+ ggml_sycl_op_gelu_erf(ctx, dst);
+}
+
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_tanh(ctx, dst);