HIP: Ignore unsupported unroll transformation in fattn-vec (#14931)

author uvos <redacted>

Tue, 29 Jul 2025 15:43:43 +0000 (17:43 +0200)

committer GitHub <redacted>

Tue, 29 Jul 2025 15:43:43 +0000 (17:43 +0200)
author uvos <redacted>
Tue, 29 Jul 2025 15:43:43 +0000 (17:43 +0200)
committer GitHub <redacted>
Tue, 29 Jul 2025 15:43:43 +0000 (17:43 +0200)
diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh

index 109253838f26c20af2d884e02f1e66fb6cc078f6..afef815ceaf1f5ff937bced722bd4ca0e2985e28 100644 (file)
--- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh
@@ -1,6 +1,12 @@
  #include "common.cuh"
  #include "fattn-common.cuh"
  
+// Currenlty llvm with the amdgcn target dose not support unrolling loops
+// that contain a break that can not be resolved at compile time.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpass-failed"
+#endif // __clang__
  template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
  #ifndef GGML_USE_HIP
  __launch_bounds__(D, 1)
@@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16(
      NO_DEVICE_CODE;
  #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
  }
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif // __clang__
  
  template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
  void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh

index 2cf2e408e92c5871c0e94b2648cfe09b0b70967a..3595e29693ac57e0a3cdd35a25ccb36f88e97296 100644 (file)
--- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh
@@ -1,6 +1,12 @@
  #include "common.cuh"
  #include "fattn-common.cuh"
  
+// Currenlty llvm with the amdgcn target dose not support unrolling loops
+// that contain a break that can not be resolved at compile time.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpass-failed"
+#endif // __clang__
  template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
  #ifndef GGML_USE_HIP
  __launch_bounds__(D, 1)
@@ -336,6 +342,9 @@ static __global__ void flash_attn_vec_ext_f32(
      NO_DEVICE_CODE;
  #endif // FLASH_ATTN_AVAILABLE
  }
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif // __clang__
  
  template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
  void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
author	uvos <redacted>
	Tue, 29 Jul 2025 15:43:43 +0000 (17:43 +0200)
committer	GitHub <redacted>
	Tue, 29 Jul 2025 15:43:43 +0000 (17:43 +0200)
ggml/src/ggml-cuda/fattn-vec-f16.cuh		patch \| blob \| history
ggml/src/ggml-cuda/fattn-vec-f32.cuh		patch \| blob \| history