From: Aman Gupta Date: Thu, 25 Dec 2025 14:12:11 +0000 (+0800) Subject: ggml-cuda: fix blackwell native builds (llama/18361) X-Git-Tag: v0.9.5~30 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=a5506cd13f278b2d20afc973e6d332a86a5df83c;p=pkg%2Fggml%2Fsources%2Fggml ggml-cuda: fix blackwell native builds (llama/18361) * ggml-cuda: fix blackwell native builds Replace 12x in native architectures by 12xa * replace for GGML_NATIVE=OFF too * only replace for native * remove 120f-virtual for default compilation --------- Co-authored-by: Aman Gupta --- diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt index f1412e8b..f3c3ebff 100644 --- a/src/ggml-cuda/CMakeLists.txt +++ b/src/ggml-cuda/CMakeLists.txt @@ -35,16 +35,33 @@ if (CUDAToolkit_FOUND) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8") list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) endif() - - if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8") - list(APPEND CMAKE_CUDA_ARCHITECTURES 120f-virtual) - endif() endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") enable_language(CUDA) + # Replace any 12x-real architectures with 12x{a}-real. FP4 ptx instructions are not available in just 12x + if (GGML_NATIVE) + set(PROCESSED_ARCHITECTURES "") + foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES_NATIVE}) + if(ARCH MATCHES "^12[0-9]$") + string(REGEX REPLACE "^(12[0-9]).*$" "\\1" BASE_ARCH ${ARCH}) + message(STATUS "Replacing ${ARCH} with ${BASE_ARCH}a-real") + list(APPEND PROCESSED_ARCHITECTURES "${BASE_ARCH}a-real") + else() + list(APPEND PROCESSED_ARCHITECTURES ${ARCH}) + endif() + endforeach() + set(CMAKE_CUDA_ARCHITECTURES ${PROCESSED_ARCHITECTURES}) + else() + foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES}) + if(ARCH MATCHES "^12[0-9]$") + message(FATAL_ERROR "Compute capability ${ARCH} used, use ${ARCH}a or ${ARCH}f for Blackwell specific optimizations") + endif() + endforeach() + endif() + file(GLOB GGML_HEADERS_CUDA "*.cuh") list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")