]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
HIP: Only call rocblas_initialize on rocblas versions with the multiple instantation...
authorNikita Sarychev <redacted>
Tue, 28 Jan 2025 15:42:20 +0000 (07:42 -0800)
committerGeorgi Gerganov <redacted>
Mon, 3 Feb 2025 20:00:57 +0000 (22:00 +0200)
This disables the workaround on rocblas fixed versions (>=4.0.0) to eliminate the runtime cost and unnecessary VRAM allocation of loading all tensile objects.

ggml/src/ggml-cuda/ggml-cuda.cu

index 402f37e85d4a5d37108753ee0944ef39e6b2fc73..de3f9c2ca1ed5bc4b272d979f7a1f2354de4c118 100644 (file)
@@ -42,6 +42,7 @@
 #include <algorithm>
 #include <array>
 #include <atomic>
+#include <charconv>
 #include <cinttypes>
 #include <cstddef>
 #include <cstdint>
@@ -172,8 +173,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
 #ifdef __HIP_PLATFORM_AMD__
     // Workaround for a rocBLAS bug when using multiple graphics cards:
     // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
-    rocblas_initialize();
-    CUDA_CHECK(cudaDeviceSynchronize());
+    {
+        int major_version = 0;
+        size_t version_length = 0;
+        if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
+            std::string version(version_length, '\0');
+            if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
+                version.resize(::strlen(version.c_str()));
+                int parsed_value = 0;
+                if (std::from_chars(version.c_str(), version.c_str() + version.length(), parsed_value).ec == std::errc()) {
+                    major_version = parsed_value;
+                }
+            }
+        }
+        if (major_version < 4) {
+            GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
+            rocblas_initialize();
+            CUDA_CHECK(cudaDeviceSynchronize());
+        }
+    }
 #endif
 
     ggml_cuda_device_info info = {};