From: Neo Zhang Date: Thu, 29 Jan 2026 01:20:22 +0000 (+0800) Subject: sycl: fix norm kernels: l2_norm, group_norm, rms_norm by remove assert to support... X-Git-Tag: v0.9.6~14 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=bc269379cba9088a92cb6b7985b4c67e0199eb2f;p=pkg%2Fggml%2Fsources%2Fggml sycl: fix norm kernels: l2_norm, group_norm, rms_norm by remove assert to support more cases (llama/19154) Co-authored-by: Neo Zhang Jianyu --- diff --git a/src/ggml-sycl/ggml-sycl.cpp b/src/ggml-sycl/ggml-sycl.cpp index ce2f0d41..3a4c092a 100644 --- a/src/ggml-sycl/ggml-sycl.cpp +++ b/src/ggml-sycl/ggml-sycl.cpp @@ -4606,14 +4606,12 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g return (op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32) && (op->type == op->src[0]->type); #endif case GGML_OP_NORM: - return true; case GGML_OP_L2_NORM: case GGML_OP_GROUP_NORM: - return ggml_is_contiguous(op->src[0]); case GGML_OP_RMS_NORM: - return ((op->src[0]->ne[0] % WARP_SIZE) == 0); + return true; case GGML_OP_RMS_NORM_BACK: - return ((op->src[0]->ne[0] % WARP_SIZE) == 0); + return ggml_is_contiguous(op->src[0]); case GGML_OP_SCALE: return true; case GGML_OP_CONT: diff --git a/src/ggml-sycl/norm.cpp b/src/ggml-sycl/norm.cpp index 823d3a48..00702b5d 100644 --- a/src/ggml-sycl/norm.cpp +++ b/src/ggml-sycl/norm.cpp @@ -251,7 +251,6 @@ static void norm_f32_sycl(const float * x, float * dst, const int ncols, const i const float eps, queue_ptr stream, int device) { const sycl::range<3> global_dims(nsamples, nchannels, nrows); - GGML_ASSERT(ncols % WARP_SIZE == 0); if (ncols < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); stream->submit([&](sycl::handler& cgh) { @@ -334,7 +333,6 @@ static void group_norm_f32_sycl(const float* x, float* dst, static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols, const int nrows, const int nchannels, const int nsamples, const int64_t stride_row, const int64_t stride_channel, const int64_t stride_sample, const float eps, queue_ptr stream, int device) { - GGML_ASSERT(ncols % WARP_SIZE == 0); // printf("%s ncols=%d, nrows=%d, WARP_SIZE=%d\n", __func__, ncols, nrows, WARP_SIZE); const sycl::range<3> global_dims(nsamples, nchannels, nrows); @@ -374,7 +372,6 @@ static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols, const static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols, const int nrows, const float eps, queue_ptr stream, int device) { - GGML_ASSERT(ncols % WARP_SIZE == 0); // printf("%s ncols=%d, nrows=%d, WARP_SIZE=%d\n", __func__, ncols, nrows, WARP_SIZE); if (ncols < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE);