From: Johannes Gäßler <redacted>
Date: Fri, 20 Sep 2024 16:35:35 +0000 (+0200)
Subject: CUDA: fix sum.cu compilation for CUDA < 11.7 (llama/9562)
X-Git-Tag: upstream/0.0.1642~360
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b1d72c6006ab3e6e19dce044921c573f375c0f8f;p=pkg%2Fggml%2Fsources%2Fggml

CUDA: fix sum.cu compilation for CUDA < 11.7 (llama/9562)
---

diff --git a/src/ggml-cuda/sum.cu b/src/ggml-cuda/sum.cu
index 21da6350..0583e4fe 100644
--- a/src/ggml-cuda/sum.cu
+++ b/src/ggml-cuda/sum.cu
@@ -1,9 +1,13 @@
-#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
+#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
+#define USE_CUB
+#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
+
+#ifdef USE_CUB
 // On Windows CUB uses libraries with variables called CC_PASCAL which conflict with the define in common.cuh.
 // For this reason CUB must be included BEFORE anything else.
 #include <cub/cub.cuh>
 using namespace cub;
-#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
+#endif // USE_CUB
 
 #include "sumrows.cuh"
 #include "sum.cuh"
@@ -11,7 +15,7 @@ using namespace cub;
 #include <cstdint>
 
 void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) {
-#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
+#ifdef USE_CUB
     size_t tmp_size = 0;
     DeviceReduce::Sum(nullptr,       tmp_size, x, dst, ne, stream);
     ggml_cuda_pool_alloc<uint8_t> tmp_alloc(pool, tmp_size);
@@ -21,7 +25,7 @@ void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int
     // For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14.
     sum_rows_f32_cuda(x, dst, ne, 1, stream);
     GGML_UNUSED(pool);
-#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA)
+#endif // USE_CUB
 }
 
 void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {