From: Konstantin Zhuravlyov Date: Sun, 7 Jan 2024 06:52:42 +0000 (-0500) Subject: ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (llama/4787) X-Git-Tag: upstream/1.7.4~1156 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2865e4710bba1db412a96638c347401e9799e9f3;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (llama/4787) --- diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 7578d21c..55f385b5 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) { static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) { #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) c = __builtin_amdgcn_sdot4(a, b, c, false); -#elif defined(__gfx1100__) +#elif defined(RDNA3) c = __builtin_amdgcn_sudot4( true, a, true, b, c, false); #elif defined(__gfx1010__) || defined(__gfx900__) int tmp1;