From: Johannes Gäßler Date: Fri, 20 Sep 2024 16:35:35 +0000 (+0200) Subject: CUDA: fix sum.cu compilation for CUDA < 11.7 (llama/9562) X-Git-Tag: upstream/0.0.1642~360 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b1d72c6006ab3e6e19dce044921c573f375c0f8f;p=pkg%2Fggml%2Fsources%2Fggml CUDA: fix sum.cu compilation for CUDA < 11.7 (llama/9562) --- diff --git a/src/ggml-cuda/sum.cu b/src/ggml-cuda/sum.cu index 21da6350..0583e4fe 100644 --- a/src/ggml-cuda/sum.cu +++ b/src/ggml-cuda/sum.cu @@ -1,9 +1,13 @@ -#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) +#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700 +#define USE_CUB +#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700 + +#ifdef USE_CUB // On Windows CUB uses libraries with variables called CC_PASCAL which conflict with the define in common.cuh. // For this reason CUB must be included BEFORE anything else. #include using namespace cub; -#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) +#endif // USE_CUB #include "sumrows.cuh" #include "sum.cuh" @@ -11,7 +15,7 @@ using namespace cub; #include void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) { -#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) +#ifdef USE_CUB size_t tmp_size = 0; DeviceReduce::Sum(nullptr, tmp_size, x, dst, ne, stream); ggml_cuda_pool_alloc tmp_alloc(pool, tmp_size); @@ -21,7 +25,7 @@ void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int // For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14. sum_rows_f32_cuda(x, dst, ne, 1, stream); GGML_UNUSED(pool); -#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) +#endif // USE_CUB } void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {