]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
cuda : fix HIP and MUSA BF16 (#0)
authorGeorgi Gerganov <redacted>
Mon, 7 Apr 2025 10:18:07 +0000 (13:18 +0300)
committerGeorgi Gerganov <redacted>
Mon, 7 Apr 2025 15:44:17 +0000 (18:44 +0300)
ggml-ci

ggml/src/ggml-cuda/convert.cu
ggml/src/ggml-cuda/vendors/hip.h
ggml/src/ggml-cuda/vendors/musa.h

index 2681c42ebecb99655234a2b3c0ce8b69899ab43d..a224ec0e12ddb1bdfa89de4da382f677c8f87998 100644 (file)
@@ -579,13 +579,7 @@ static __global__ void convert_unary(const void * __restrict__ vx, dst_t * __res
 
     const src_t * x = (const src_t *) vx;
 
-    if constexpr (std::is_same_v<src_t, nv_bfloat16>) {
-        y[i] = __bfloat162float(x[i]);
-    } else if constexpr (std::is_same_v<dst_t, nv_bfloat16> && std::is_same_v<src_t, half>) {
-        y[i] = (float)x[i];
-    } else {
-        y[i] = x[i];
-    }
+    y[i] = float(x[i]);
 }
 
 template <typename src_t, typename dst_t>
index 3983ce5b423c0804e620125dcfa8ee00948c4fec..420b41b8d652d6b6c15b780fd28e348a63643bf0 100644 (file)
@@ -20,6 +20,7 @@
 #define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
 #define CUBLAS_TF32_TENSOR_OP_MATH 0
 #define CUDA_R_16F  HIPBLAS_R_16F
+#define CUDA_R_16BF HIPBLAS_R_16B
 #define CUDA_R_32F  HIPBLAS_R_32F
 #define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED hipDeviceAttributeVirtualMemoryManagementSupported
 #define CU_MEM_ALLOC_GRANULARITY_RECOMMENDED hipMemAllocationGranularityRecommended
index f2d55796e7874954b458fc83e6344499c266d555..937779a90af6efbc3bbb8b8c487b4909a53f9478 100644 (file)
@@ -15,6 +15,7 @@
 #define CUBLAS_STATUS_SUCCESS MUBLAS_STATUS_SUCCESS
 #define CUBLAS_TF32_TENSOR_OP_MATH MUBLAS_MATH_MODE_DEFAULT
 #define CUDA_R_16F  MUSA_R_16F
+#define CUDA_R_16BF MUSA_R_16BF
 #define CUDA_R_32F  MUSA_R_32F
 #define cublasComputeType_t cudaDataType_t
 #define cublasCreate mublasCreate