#include <algorithm>
#include <array>
#include <atomic>
+#include <charconv>
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#ifdef __HIP_PLATFORM_AMD__
// Workaround for a rocBLAS bug when using multiple graphics cards:
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
- rocblas_initialize();
- CUDA_CHECK(cudaDeviceSynchronize());
+ {
+ int major_version = 0;
+ size_t version_length = 0;
+ if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
+ std::string version(version_length, '\0');
+ if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
+ version.resize(::strlen(version.c_str()));
+ int parsed_value = 0;
+ if (std::from_chars(version.c_str(), version.c_str() + version.length(), parsed_value).ec == std::errc()) {
+ major_version = parsed_value;
+ }
+ }
+ }
+ if (major_version < 4) {
+ GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
+ rocblas_initialize();
+ CUDA_CHECK(cudaDeviceSynchronize());
+ }
+ }
#endif
ggml_cuda_device_info info = {};