]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (llama/4787)
authorKonstantin Zhuravlyov <redacted>
Sun, 7 Jan 2024 06:52:42 +0000 (01:52 -0500)
committerGeorgi Gerganov <redacted>
Thu, 11 Jan 2024 19:50:00 +0000 (21:50 +0200)
ggml-cuda.cu

index 7578d21c6cf140bdae12951d1370b3d4d18557d0..55f385b56ccf4d22fbee3da5871db5be90c9c1bb 100644 (file)
@@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
 static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
 #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
     c = __builtin_amdgcn_sdot4(a, b, c, false);
-#elif defined(__gfx1100__)
+#elif defined(RDNA3)
     c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
 #elif defined(__gfx1010__) || defined(__gfx900__)
     int tmp1;