cmake : pass CPU architecture flags to nvcc (#5146)

author Jared Van Bortel <redacted>

Fri, 26 Jan 2024 20:34:06 +0000 (15:34 -0500)

committer GitHub <redacted>

Fri, 26 Jan 2024 20:34:06 +0000 (15:34 -0500)
author Jared Van Bortel <redacted>
Fri, 26 Jan 2024 20:34:06 +0000 (15:34 -0500)
committer GitHub <redacted>
Fri, 26 Jan 2024 20:34:06 +0000 (15:34 -0500)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index af36651297b7677837f0892e02c0e7e9e86d82cc..2b2ae532e49a230099ed095732842f471e556487 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -466,17 +466,17 @@ function(get_flags CCID CCVER)
              (CCID STREQUAL "Clang"      AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
              (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
          )
-            set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
+            list(APPEND C_FLAGS -Wdouble-promotion)
          endif()
      elseif (CCID STREQUAL "GNU")
          set(C_FLAGS   -Wdouble-promotion)
          set(CXX_FLAGS -Wno-array-bounds)
  
          if (CCVER VERSION_GREATER_EQUAL 7.1.0)
-            set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
+            list(APPEND CXX_FLAGS -Wno-format-truncation)
          endif()
          if (CCVER VERSION_GREATER_EQUAL 8.1.0)
-            set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
+            list(APPEND CXX_FLAGS -Wextra-semi)
          endif()
      elseif (CCID MATCHES "Intel")
          # enable max optimization level when using Intel compiler
@@ -510,16 +510,18 @@ if (LLAMA_ALL_WARNINGS)
      endif()
  endif()
  
+set(CUDA_CXX_FLAGS "")
+
  if (LLAMA_CUBLAS)
      set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
      if (NOT MSVC)
-        set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
+        list(APPEND CUDA_FLAGS -Wno-pedantic)
      endif()
  
      if (LLAMA_ALL_WARNINGS AND NOT MSVC)
          set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
          if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
-            set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
+            list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
          endif()
  
          execute_process(
@@ -547,13 +549,8 @@ if (LLAMA_CUBLAS)
          message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
  
          get_flags(${CUDA_CCID} ${CUDA_CCVER})
-        list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS)  # pass host compiler flags as a single argument
-        if (NOT CUDA_CXX_FLAGS STREQUAL "")
-            set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
-        endif()
+        list(APPEND CUDA_CXX_FLAGS ${GF_CXX_FLAGS})  # This is passed to -Xcompiler later
      endif()
-
-    add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
  endif()
  
  if (WIN32)
@@ -618,12 +615,7 @@ if (NOT MSVC)
      endif()
  endif()
  
-function(add_compile_option_cpp ARG)
-    # Adds a compile option to C/C++ only, but not for Cuda.
-    # Use, e.g., for CPU-architecture flags.
-    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${ARG}>)
-    add_compile_options($<$<COMPILE_LANGUAGE:C>:${ARG}>)
-endfunction()
+set(ARCH_FLAGS "")
  
  if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
      message(STATUS "ARM detected")
@@ -636,19 +628,19 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
      else()
          check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
          if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
-            add_compile_options(-mfp16-format=ieee)
+            list(APPEND ARCH_FLAGS -mfp16-format=ieee)
          endif()
          if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
              # Raspberry Pi 1, Zero
-            add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access)
+            list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
          endif()
          if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
              # Raspberry Pi 2
-            add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
+            list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
          endif()
          if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
              # Raspberry Pi 3, 4, Zero 2 (32-bit)
-            add_compile_options(-mno-unaligned-access)
+            list(APPEND ARCH_FLAGS -mno-unaligned-access)
          endif()
      endif()
  elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
@@ -659,7 +651,7 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
              include(cmake/FindSIMD.cmake)
          endif ()
          if (LLAMA_AVX512)
-            add_compile_option_cpp(/arch:AVX512)
+            list(APPEND ARCH_FLAGS /arch:AVX512)
              # MSVC has no compile-time flags enabling specific
              # AVX512 extensions, neither it defines the
              # macros corresponding to the extensions.
@@ -673,49 +665,61 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
                  add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
              endif()
          elseif (LLAMA_AVX2)
-            add_compile_option_cpp(/arch:AVX2)
+            list(APPEND ARCH_FLAGS /arch:AVX2)
          elseif (LLAMA_AVX)
-            add_compile_option_cpp(/arch:AVX)
+            list(APPEND ARCH_FLAGS /arch:AVX)
          endif()
      else()
          if (LLAMA_NATIVE)
-            add_compile_option_cpp(-march=native)
+            list(APPEND ARCH_FLAGS -march=native)
          endif()
          if (LLAMA_F16C)
-            add_compile_option_cpp(-mf16c)
+            list(APPEND ARCH_FLAGS -mf16c)
          endif()
          if (LLAMA_FMA)
-            add_compile_option_cpp(-mfma)
+            list(APPEND ARCH_FLAGS -mfma)
          endif()
          if (LLAMA_AVX)
-            add_compile_option_cpp(-mavx)
+            list(APPEND ARCH_FLAGS -mavx)
          endif()
          if (LLAMA_AVX2)
-            add_compile_option_cpp(-mavx2)
+            list(APPEND ARCH_FLAGS -mavx2)
          endif()
          if (LLAMA_AVX512)
-            add_compile_option_cpp(-mavx512f)
-            add_compile_option_cpp(-mavx512bw)
+            list(APPEND ARCH_FLAGS -mavx512f)
+            list(APPEND ARCH_FLAGS -mavx512bw)
          endif()
          if (LLAMA_AVX512_VBMI)
-            add_compile_option_cpp(-mavx512vbmi)
+            list(APPEND ARCH_FLAGS -mavx512vbmi)
          endif()
          if (LLAMA_AVX512_VNNI)
-            add_compile_option_cpp(-mavx512vnni)
+            list(APPEND ARCH_FLAGS -mavx512vnni)
          endif()
      endif()
  elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
      message(STATUS "PowerPC detected")
      if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
-        add_compile_options(-mcpu=powerpc64le)
+        list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
      else()
-        add_compile_options(-mcpu=native -mtune=native)
+        list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
          #TODO: Add  targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
      endif()
  else()
      message(STATUS "Unknown architecture")
  endif()
  
+add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
+add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
+
+if (LLAMA_CUBLAS)
+    list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
+    list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED)  # pass host compiler flags as a single argument
+    if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
+        list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
+    endif()
+    add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
+endif()
+
  if (MINGW)
      # Target Windows 8 for PrefetchVirtualMemory
      add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
author	Jared Van Bortel <redacted>
	Fri, 26 Jan 2024 20:34:06 +0000 (15:34 -0500)
committer	GitHub <redacted>
	Fri, 26 Jan 2024 20:34:06 +0000 (15:34 -0500)