CUDA: revert part of the RDNA1 optimizations (llama/8309)

author Daniele <redacted>

Fri, 5 Jul 2024 07:06:09 +0000 (07:06 +0000)

committer Georgi Gerganov <redacted>

Mon, 8 Jul 2024 10:03:28 +0000 (13:03 +0300)
author Daniele <redacted>
Fri, 5 Jul 2024 07:06:09 +0000 (07:06 +0000)
committer Georgi Gerganov <redacted>
Mon, 8 Jul 2024 10:03:28 +0000 (13:03 +0300)
diff --git a/src/ggml-cuda/mmq.cuh b/src/ggml-cuda/mmq.cuh

index a97afc7ac80aa81b56a525811e7c7152661ccaf7..fca93618dc754b139a1b6a9e0db5a1e1964caacd 100644 (file)
--- a/src/ggml-cuda/mmq.cuh
+++ b/src/ggml-cuda/mmq.cuh
@@ -2263,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile(
  
  template <ggml_type type, int mmq_x, int nwarps, bool need_check>
  #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
-#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
+#if defined(RDNA3) || defined(RDNA2)
      __launch_bounds__(WARP_SIZE*nwarps, 2)
-#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
+#endif // defined(RDNA3) || defined(RDNA2)
  #else
  #if __CUDA_ARCH__ >= CC_VOLTA
      __launch_bounds__(WARP_SIZE*nwarps, 1)
author	Daniele <redacted>
	Fri, 5 Jul 2024 07:06:09 +0000 (07:06 +0000)
committer	Georgi Gerganov <redacted>
	Mon, 8 Jul 2024 10:03:28 +0000 (13:03 +0300)