Add a warning for special devices (#15563)

author Yoshi_likes_e4 <redacted>

Tue, 26 Aug 2025 06:15:33 +0000 (13:15 +0700)

committer GitHub <redacted>

Tue, 26 Aug 2025 06:15:33 +0000 (08:15 +0200)
author Yoshi_likes_e4 <redacted>
Tue, 26 Aug 2025 06:15:33 +0000 (13:15 +0700)
committer GitHub <redacted>
Tue, 26 Aug 2025 06:15:33 +0000 (08:15 +0200)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu

index aa45ab39ed89efd093e49428daaa4bb5a87e46e5..449488341557fd5d9279aa6643ed24d15ddeceec 100644 (file)
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -204,6 +204,8 @@ static ggml_cuda_device_info ggml_cuda_init() {
      GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
  #endif // GGML_CUDA_FORCE_CUBLAS
      GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
+
+    std::vector<std::pair<int, std::string>> turing_devices_without_mma;
      for (int id = 0; id < info.device_count; ++id) {
          int device_vmm = 0;
  
@@ -261,7 +263,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
          info.devices[id].cc = 100*prop.major + 10*prop.minor;
          GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n",
                          id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
-#endif // defined(GGML_USE_HIP)
+        std::string device_name(prop.name);
+        if (device_name == "NVIDIA GeForce MX450") {
+            turing_devices_without_mma.push_back({ id, device_name });
+        } else if (device_name == "NVIDIA GeForce MX550") {
+            turing_devices_without_mma.push_back({ id, device_name });
+        } else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") {
+            turing_devices_without_mma.push_back({ id, device_name });
+        }
+#endif  // defined(GGML_USE_HIP)
+    }
+
+    if (ggml_cuda_highest_compiled_arch(GGML_CUDA_CC_TURING) >= GGML_CUDA_CC_TURING && !turing_devices_without_mma.empty()) {
+        GGML_LOG_INFO("The following devices will have suboptimal performance due to a lack of tensor cores:\n");
+        for (size_t device_pos = 0; device_pos < turing_devices_without_mma.size(); device_pos++) {
+            GGML_LOG_INFO(
+                "  Device %d: %s\n", turing_devices_without_mma[device_pos].first, turing_devices_without_mma[device_pos].second.c_str());
+        }
+        GGML_LOG_INFO(
+            "Consider compiling with CMAKE_CUDA_ARCHITECTURES=61-virtual;80-virtual and DGGML_CUDA_FORCE_MMQ to force the use of the Pascal code for Turing.\n");
      }
  
      for (int id = 0; id < info.device_count; ++id) {
author	Yoshi_likes_e4 <redacted>
	Tue, 26 Aug 2025 06:15:33 +0000 (13:15 +0700)
committer	GitHub <redacted>
	Tue, 26 Aug 2025 06:15:33 +0000 (08:15 +0200)