From: Daniele Date: Fri, 5 Jul 2024 07:06:09 +0000 (+0000) Subject: CUDA: revert part of the RDNA1 optimizations (llama/8309) X-Git-Tag: upstream/1.7.4~589 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=73703a144fd9d14c104932813898352549acd817;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp CUDA: revert part of the RDNA1 optimizations (llama/8309) The change on the launch_bounds was causing a small performance drop in perplexity of 25 t/s --- diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index a97afc7a..fca93618 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -2263,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile( template #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) -#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1) +#if defined(RDNA3) || defined(RDNA2) __launch_bounds__(WARP_SIZE*nwarps, 2) -#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1) +#endif // defined(RDNA3) || defined(RDNA2) #else #if __CUDA_ARCH__ >= CC_VOLTA __launch_bounds__(WARP_SIZE*nwarps, 1)