Define and optimize RDNA1 (llama/8085)

author Daniele <redacted>

Wed, 3 Jul 2024 23:02:58 +0000 (23:02 +0000)

committer Georgi Gerganov <redacted>

Mon, 8 Jul 2024 10:03:28 +0000 (13:03 +0300)
author Daniele <redacted>
Wed, 3 Jul 2024 23:02:58 +0000 (23:02 +0000)
committer Georgi Gerganov <redacted>
Mon, 8 Jul 2024 10:03:28 +0000 (13:03 +0300)
diff --git a/src/ggml-cuda/common.cuh b/src/ggml-cuda/common.cuh

index 472f4ace1c2ad225e34adcaa86cd01b751677689..4ff06b8719d378e89d1d099723cb31e50224747e 100644 (file)
--- a/src/ggml-cuda/common.cuh
+++ b/src/ggml-cuda/common.cuh
@@ -227,6 +227,10 @@ typedef float2 dfloat2;
  #define RDNA2
  #endif
  
+#if defined(__gfx1010__) || defined(__gfx1012__)
+#define RDNA1
+#endif
+
  #ifndef __has_builtin
      #define __has_builtin(x) 0
  #endif
diff --git a/src/ggml-cuda/mmq.cuh b/src/ggml-cuda/mmq.cuh

index 1396e7a753ac34175c93faac19459fc38868c44b..deaed066f7c908c6e088e247068f0d55df28d84b 100644 (file)
--- a/src/ggml-cuda/mmq.cuh
+++ b/src/ggml-cuda/mmq.cuh
@@ -60,12 +60,16 @@ static constexpr __device__ int get_mmq_x_max_device() {
  }
  
  static constexpr int get_mmq_y_host(const int cc) {
-    return int8_mma_available(cc) || cc >= CC_VOLTA ? 128 : 64;
+    return cc >= CC_OFFSET_AMD ? (cc == CC_RDNA1 ? 64 : 128) : (cc >= CC_VOLTA ? 128 : 64);
  }
  
  static constexpr __device__ int get_mmq_y_device() {
  #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
+#if defined(RDNA1)
+    return 64;
+#else
      return 128;
+#endif // defined RDNA1
  #else
  #if __CUDA_ARCH__ >= CC_VOLTA
      return 128;
@@ -2259,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile(
  
  template <ggml_type type, int mmq_x, int nwarps, bool need_check>
  #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
-#if defined(RDNA3) || defined(RDNA2)
+#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
      __launch_bounds__(WARP_SIZE*nwarps, 2)
-#endif // defined(RDNA3) || defined(RDNA2)
+#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
  #else
  #if __CUDA_ARCH__ >= CC_VOLTA
      __launch_bounds__(WARP_SIZE*nwarps, 1)
author	Daniele <redacted>
	Wed, 3 Jul 2024 23:02:58 +0000 (23:02 +0000)
committer	Georgi Gerganov <redacted>
	Mon, 8 Jul 2024 10:03:28 +0000 (13:03 +0300)
src/ggml-cuda/common.cuh		patch \| blob \| history
src/ggml-cuda/mmq.cuh		patch \| blob \| history