HIP: bump requirement to rocm 6.1 (llama/15296)

author uvos <redacted>

Wed, 13 Aug 2025 18:44:30 +0000 (20:44 +0200)

committer Georgi Gerganov <redacted>

Thu, 14 Aug 2025 11:17:28 +0000 (14:17 +0300)
author uvos <redacted>
Wed, 13 Aug 2025 18:44:30 +0000 (20:44 +0200)
committer Georgi Gerganov <redacted>
Thu, 14 Aug 2025 11:17:28 +0000 (14:17 +0300)
diff --git a/src/ggml-cuda/common.cuh b/src/ggml-cuda/common.cuh

index 5a2a3478d26502e6c32e83a1f7120d50fa1fdb14..2b14b30ac90f336c291b51070e824638ed98231c 100644 (file)
--- a/src/ggml-cuda/common.cuh
+++ b/src/ggml-cuda/common.cuh
@@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b
  }
  
  static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) {
-#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000
+#if defined(GGML_USE_HIP)
      return half2(__hmax(a.x, b.x), __hmax(a.y, b.y));
-#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX
+#elif CUDART_VERSION >= CUDART_HMAX
      return __hmax2(a, b);
-#elif !defined(GGML_USE_HIP)
+#else
      half2 ret;
      reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a),  __low2float(b)));
      reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b)));
      return ret;
-#else
-    GGML_UNUSED(a);
-    GGML_UNUSED(b);
-    NO_DEVICE_CODE;
  #endif
  }
  
  template<int width = WARP_SIZE>
  static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
-#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
+#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
  #pragma unroll
     for (int offset = width/2; offset > 0; offset >>= 1) {
         x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width));
@@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
  #else
     GGML_UNUSED(x);
     NO_DEVICE_CODE;
-#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
+#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
  }
  
  #if CUDART_VERSION < CUDART_HMASK
diff --git a/src/ggml-cuda/ggml-cuda.cu b/src/ggml-cuda/ggml-cuda.cu

index d9110491ec78c42235f780cd97623a587061cfcd..0d92901cb21421e15f06cdaf74a788d5234d15ab 100644 (file)
--- a/src/ggml-cuda/ggml-cuda.cu
+++ b/src/ggml-cuda/ggml-cuda.cu
@@ -180,30 +180,6 @@ static int ggml_cuda_parse_id(char devName[]) {
  #endif // defined(GGML_USE_HIP)
  
  static ggml_cuda_device_info ggml_cuda_init() {
-#if defined(GGML_USE_HIP)
-    // Workaround for a rocBLAS bug when using multiple graphics cards:
-    // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
-    {
-        int major_version = 0;
-        size_t version_length = 0;
-        if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
-            std::vector<char> version(version_length+1, '\0');
-            if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
-                version.resize(::strlen(version.data()));
-                int parsed_value = 0;
-                if (std::from_chars(version.data(), version.data() + version.size(), parsed_value).ec == std::errc()) {
-                    major_version = parsed_value;
-                }
-            }
-        }
-        if (major_version < 4) {
-            GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
-            rocblas_initialize();
-            CUDA_CHECK(cudaDeviceSynchronize());
-        }
-    }
-#endif
-
      ggml_cuda_device_info info = {};
  
      cudaError_t err = cudaGetDeviceCount(&info.device_count);
diff --git a/src/ggml-cuda/vendors/hip.h b/src/ggml-cuda/vendors/hip.h

index 96f8bc75e9643b9097b85cf02bcb91050e60602a..ec1b59caafc9aa98541fc8638b5b203193ec9ce0 100644 (file)
--- a/src/ggml-cuda/vendors/hip.h
+++ b/src/ggml-cuda/vendors/hip.h
@@ -5,8 +5,6 @@
  #include <hipblas/hipblas.h>
  #include <hip/hip_fp16.h>
  #include <hip/hip_bfloat16.h>
-// for rocblas_initialize()
-#include "rocblas/rocblas.h"
  
  #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
  #define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT
@@ -251,17 +249,3 @@ static __device__ __forceinline__ unsigned int __vcmpne4(unsigned int a, unsigne
      }
      return c;
  }
-
-#if HIP_VERSION < 50600000
-// __shfl_xor() for half2 was added in ROCm 5.6
-static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int width) {
-    typedef union half2_b32 {
-        half2 val;
-        int   b32;
-    } half2_b32_t;
-    half2_b32_t tmp;
-    tmp.val = var;
-    tmp.b32 = __shfl_xor(tmp.b32, laneMask, width);
-    return tmp.val;
-}
-#endif // HIP_VERSION < 50600000
diff --git a/src/ggml-hip/CMakeLists.txt b/src/ggml-hip/CMakeLists.txt

index 852de973460252415da3db528b56b866e19cd720..d327b90cceb25ab424918a58f52cc2252f758164 100644 (file)
--- a/src/ggml-hip/CMakeLists.txt
+++ b/src/ggml-hip/CMakeLists.txt
@@ -46,8 +46,8 @@ if (GGML_HIP_ROCWMMA_FATTN)
      endif()
  endif()
  
-if (${hip_VERSION} VERSION_LESS 5.5)
-    message(FATAL_ERROR "At least ROCM/HIP V5.5 is required")
+if (${hip_VERSION} VERSION_LESS 6.1)
+    message(FATAL_ERROR "At least ROCM/HIP V6.1 is required")
  endif()
  
  message(STATUS "HIP and hipBLAS found")
author	uvos <redacted>
	Wed, 13 Aug 2025 18:44:30 +0000 (20:44 +0200)
committer	Georgi Gerganov <redacted>
	Thu, 14 Aug 2025 11:17:28 +0000 (14:17 +0300)
src/ggml-cuda/common.cuh		patch \| blob \| history
src/ggml-cuda/ggml-cuda.cu		patch \| blob \| history
src/ggml-cuda/vendors/hip.h		patch \| blob \| history
src/ggml-hip/CMakeLists.txt		patch \| blob \| history