From: Jiahao Li Date: Tue, 11 Jul 2023 18:12:57 +0000 (+0800) Subject: ggml : use a single kernel for CUDA mul op (#373) X-Git-Tag: upstream/0.0.1642~1331 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=f5165d0392da6013160971534a5cc60643fa2d37;p=pkg%2Fggml%2Fsources%2Fggml ggml : use a single kernel for CUDA mul op (#373) --- diff --git a/src/ggml-cuda.cu b/src/ggml-cuda.cu index 17c8bb76..2fb30c6e 100644 --- a/src/ggml-cuda.cu +++ b/src/ggml-cuda.cu @@ -2305,20 +2305,11 @@ inline void ggml_cuda_op_mul( GGML_ASSERT(dst_ddf_i != nullptr); const int64_t ne00 = src0->ne[0]; + const int64_t i01_diff = i01_high - i01_low; const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; - - for (int64_t i01 = i01_low; i01 < i01_high; i01++) { - const int64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0 - float * src0_ddf_i01 = src0_ddf_i + i01*ne00; - float * src1_ddf_i01 = src1_ddf_i + i11*ne10; - float * dst_ddf_i01 = dst_ddf_i + i01*ne00; - - // compute - mul_f32_cuda(src0_ddf_i01, src1_ddf_i01, dst_ddf_i01, ne00, ne10, cudaStream_main); - } + mul_f32_cuda(src0_ddf_i, src1_ddf_i, dst_ddf_i, ne00*i01_diff, ne10, cudaStream_main); (void) dst; (void) src0_ddq_i;