if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
endif()
-
- if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
- list(APPEND CMAKE_CUDA_ARCHITECTURES 120f-virtual)
- endif()
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
enable_language(CUDA)
+ # Replace any 12x-real architectures with 12x{a}-real. FP4 ptx instructions are not available in just 12x
+ if (GGML_NATIVE)
+ set(PROCESSED_ARCHITECTURES "")
+ foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES_NATIVE})
+ if(ARCH MATCHES "^12[0-9]$")
+ string(REGEX REPLACE "^(12[0-9]).*$" "\\1" BASE_ARCH ${ARCH})
+ message(STATUS "Replacing ${ARCH} with ${BASE_ARCH}a-real")
+ list(APPEND PROCESSED_ARCHITECTURES "${BASE_ARCH}a-real")
+ else()
+ list(APPEND PROCESSED_ARCHITECTURES ${ARCH})
+ endif()
+ endforeach()
+ set(CMAKE_CUDA_ARCHITECTURES ${PROCESSED_ARCHITECTURES})
+ else()
+ foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES})
+ if(ARCH MATCHES "^12[0-9]$")
+ message(FATAL_ERROR "Compute capability ${ARCH} used, use ${ARCH}a or ${ARCH}f for Blackwell specific optimizations")
+ endif()
+ endforeach()
+ endif()
+
file(GLOB GGML_HEADERS_CUDA "*.cuh")
list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")