dst[i] = x[i] * y[i%ky];
}
-static const float GELU_COEF_A = 0.044715f;
-static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
-
static __global__ void gelu_f32(const float * x, float * dst, const int k) {
+ const float GELU_COEF_A = 0.044715f;
+ const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
const int i = blockDim.x*blockIdx.x + threadIdx.x;
if (i >= k) {
const int64_t ne00 = src0->ne[0];
const int64_t i01_diff = i01_high - i01_low;
- const int64_t ne10 = src1->ne[0];
+ // const int64_t ne10 = src1->ne[0];
// compute
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {