(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
- set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
+ list(APPEND C_FLAGS -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)
if (CCVER VERSION_GREATER_EQUAL 7.1.0)
- set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
+ list(APPEND CXX_FLAGS -Wno-format-truncation)
endif()
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
- set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
+ list(APPEND CXX_FLAGS -Wextra-semi)
endif()
elseif (CCID MATCHES "Intel")
# enable max optimization level when using Intel compiler
endif()
endif()
+set(CUDA_CXX_FLAGS "")
+
if (LLAMA_CUBLAS)
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
if (NOT MSVC)
- set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
+ list(APPEND CUDA_FLAGS -Wno-pedantic)
endif()
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
- set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
+ list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
endif()
execute_process(
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
get_flags(${CUDA_CCID} ${CUDA_CCVER})
- list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
- if (NOT CUDA_CXX_FLAGS STREQUAL "")
- set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
- endif()
+ list(APPEND CUDA_CXX_FLAGS ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
endif()
-
- add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
endif()
if (WIN32)
endif()
endif()
-function(add_compile_option_cpp ARG)
- # Adds a compile option to C/C++ only, but not for Cuda.
- # Use, e.g., for CPU-architecture flags.
- add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${ARG}>)
- add_compile_options($<$<COMPILE_LANGUAGE:C>:${ARG}>)
-endfunction()
+set(ARCH_FLAGS "")
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
message(STATUS "ARM detected")
else()
check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
- add_compile_options(-mfp16-format=ieee)
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
# Raspberry Pi 1, Zero
- add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access)
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
# Raspberry Pi 2
- add_compile_options(-mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
+ list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
# Raspberry Pi 3, 4, Zero 2 (32-bit)
- add_compile_options(-mno-unaligned-access)
+ list(APPEND ARCH_FLAGS -mno-unaligned-access)
endif()
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
include(cmake/FindSIMD.cmake)
endif ()
if (LLAMA_AVX512)
- add_compile_option_cpp(/arch:AVX512)
+ list(APPEND ARCH_FLAGS /arch:AVX512)
# MSVC has no compile-time flags enabling specific
# AVX512 extensions, neither it defines the
# macros corresponding to the extensions.
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
endif()
elseif (LLAMA_AVX2)
- add_compile_option_cpp(/arch:AVX2)
+ list(APPEND ARCH_FLAGS /arch:AVX2)
elseif (LLAMA_AVX)
- add_compile_option_cpp(/arch:AVX)
+ list(APPEND ARCH_FLAGS /arch:AVX)
endif()
else()
if (LLAMA_NATIVE)
- add_compile_option_cpp(-march=native)
+ list(APPEND ARCH_FLAGS -march=native)
endif()
if (LLAMA_F16C)
- add_compile_option_cpp(-mf16c)
+ list(APPEND ARCH_FLAGS -mf16c)
endif()
if (LLAMA_FMA)
- add_compile_option_cpp(-mfma)
+ list(APPEND ARCH_FLAGS -mfma)
endif()
if (LLAMA_AVX)
- add_compile_option_cpp(-mavx)
+ list(APPEND ARCH_FLAGS -mavx)
endif()
if (LLAMA_AVX2)
- add_compile_option_cpp(-mavx2)
+ list(APPEND ARCH_FLAGS -mavx2)
endif()
if (LLAMA_AVX512)
- add_compile_option_cpp(-mavx512f)
- add_compile_option_cpp(-mavx512bw)
+ list(APPEND ARCH_FLAGS -mavx512f)
+ list(APPEND ARCH_FLAGS -mavx512bw)
endif()
if (LLAMA_AVX512_VBMI)
- add_compile_option_cpp(-mavx512vbmi)
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
endif()
if (LLAMA_AVX512_VNNI)
- add_compile_option_cpp(-mavx512vnni)
+ list(APPEND ARCH_FLAGS -mavx512vnni)
endif()
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
message(STATUS "PowerPC detected")
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
- add_compile_options(-mcpu=powerpc64le)
+ list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
else()
- add_compile_options(-mcpu=native -mtune=native)
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
endif()
else()
message(STATUS "Unknown architecture")
endif()
+add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
+add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
+
+if (LLAMA_CUBLAS)
+ list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
+ list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
+ if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
+ list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
+ endif()
+ add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
+endif()
+
if (MINGW)
# Target Windows 8 for PrefetchVirtualMemory
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})