* musa: fix failures in test-backend-ops for mul_mat_id op
Signed-off-by: Xiaodong Ye <redacted>
* Address review comments
Signed-off-by: Xiaodong Ye <redacted>
---------
Signed-off-by: Xiaodong Ye <redacted>
}
static bool ampere_mma_available(const int cc) {
- return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
+ return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
}
static bool cp_async_available(const int cc) {
- return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
+ return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
}
static constexpr __device__ int ggml_cuda_get_physical_warp_size() {