ggml-cuda: fix blackwell native builds (llama/18361)

author Aman Gupta <redacted>

Thu, 25 Dec 2025 14:12:11 +0000 (22:12 +0800)

committer Georgi Gerganov <redacted>

Wed, 31 Dec 2025 10:39:43 +0000 (12:39 +0200)
author Aman Gupta <redacted>
Thu, 25 Dec 2025 14:12:11 +0000 (22:12 +0800)
committer Georgi Gerganov <redacted>
Wed, 31 Dec 2025 10:39:43 +0000 (12:39 +0200)
diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt

index f1412e8b196052a71423988f402ee4bdf928d2aa..f3c3ebff7ea3cb7853456a90ed5cbab8d6fae65c 100644 (file)
--- a/src/ggml-cuda/CMakeLists.txt
+++ b/src/ggml-cuda/CMakeLists.txt
@@ -35,16 +35,33 @@ if (CUDAToolkit_FOUND)
              if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
                  list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
              endif()
-
-            if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
-                list(APPEND CMAKE_CUDA_ARCHITECTURES 120f-virtual)
-            endif()
          endif()
      endif()
      message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
  
      enable_language(CUDA)
  
+    # Replace any 12x-real architectures with 12x{a}-real. FP4 ptx instructions are not available in just 12x
+    if (GGML_NATIVE)
+        set(PROCESSED_ARCHITECTURES "")
+        foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES_NATIVE})
+            if(ARCH MATCHES "^12[0-9]$")
+                string(REGEX REPLACE "^(12[0-9]).*$" "\\1" BASE_ARCH ${ARCH})
+                message(STATUS "Replacing ${ARCH} with ${BASE_ARCH}a-real")
+                list(APPEND PROCESSED_ARCHITECTURES "${BASE_ARCH}a-real")
+            else()
+                list(APPEND PROCESSED_ARCHITECTURES ${ARCH})
+            endif()
+        endforeach()
+        set(CMAKE_CUDA_ARCHITECTURES ${PROCESSED_ARCHITECTURES})
+    else()
+        foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES})
+            if(ARCH MATCHES "^12[0-9]$")
+                message(FATAL_ERROR "Compute capability ${ARCH} used, use ${ARCH}a or ${ARCH}f for Blackwell specific optimizations")
+            endif()
+        endforeach()
+    endif()
+
      file(GLOB   GGML_HEADERS_CUDA "*.cuh")
      list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")
author	Aman Gupta <redacted>
	Thu, 25 Dec 2025 14:12:11 +0000 (22:12 +0800)
committer	Georgi Gerganov <redacted>
	Wed, 31 Dec 2025 10:39:43 +0000 (12:39 +0200)