endif()
endif()
+function(get_flags CCID CCVER)
+ set(C_FLAGS "")
+ set(CXX_FLAGS "")
+
+ if (CCID MATCHES "Clang")
+ set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
+ set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
+
+ if (
+ (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
+ (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
+ )
+ set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
+ endif()
+ elseif (CCID STREQUAL "GNU")
+ set(C_FLAGS -Wdouble-promotion)
+ set(CXX_FLAGS -Wno-array-bounds)
+
+ if (CCVER VERSION_GREATER_EQUAL 7.1.0)
+ set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
+ endif()
+ if (CCVER VERSION_GREATER_EQUAL 8.1.0)
+ set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
+ endif()
+ endif()
+
+ set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
+ set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
+endfunction()
+
if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
- set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
- set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
- set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
- set(host_cxx_flags "")
-
- if (CMAKE_C_COMPILER_ID MATCHES "Clang")
- set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
- set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)
-
- if (
- (CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
- (CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
- )
- set(c_flags ${c_flags} -Wdouble-promotion)
- endif()
- elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
- set(c_flags ${c_flags} -Wdouble-promotion)
- set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds)
+ set(WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
+ set(C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
+ -Werror=implicit-int -Werror=implicit-function-declaration)
+ set(CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
- if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
- set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation)
- endif()
- if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
- set(host_cxx_flags ${host_cxx_flags} -Wextra-semi)
- endif()
- endif()
+ set(C_FLAGS ${WARNING_FLAGS} ${C_FLAGS})
+ set(CXX_FLAGS ${WARNING_FLAGS} ${CXX_FLAGS})
+
+ get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
+
+ add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
+ "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
else()
# todo : msvc
+ set(C_FLAGS "")
+ set(CXX_FLAGS "")
endif()
+endif()
- set(c_flags ${c_flags} ${warning_flags})
- set(cxx_flags ${cxx_flags} ${warning_flags})
- add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
- "$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
- "$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
+if (LLAMA_CUBLAS)
+ set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
+ if (NOT MSVC)
+ set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
+ endif()
-endif()
+ if (LLAMA_ALL_WARNINGS AND NOT MSVC)
+ set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
+ if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
+ set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
+ endif()
-if (NOT MSVC)
- set(cuda_flags -Wno-pedantic)
-endif()
-set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
+ execute_process(
+ COMMAND ${NVCC_CMD} -Xcompiler --version
+ OUTPUT_VARIABLE CUDA_CCFULLVER
+ ERROR_QUIET
+ )
-list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
-if (NOT cuda_host_flags STREQUAL "")
- set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
-endif()
+ if (NOT CUDA_CCFULLVER MATCHES clang)
+ set(CUDA_CCID "GNU")
+ execute_process(
+ COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
+ OUTPUT_VARIABLE CUDA_CCVER
+ ERROR_QUIET
+ )
+ else()
+ if (CUDA_CCFULLVER MATCHES Apple)
+ set(CUDA_CCID "AppleClang")
+ else()
+ set(CUDA_CCID "Clang")
+ endif()
+ string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
+ endif()
+
+ message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
-add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
+ get_flags(${CUDA_CCID} ${CUDA_CCVER})
+ list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
+ if (NOT CUDA_CXX_FLAGS STREQUAL "")
+ set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
+ endif()
+ endif()
+
+ add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
+endif()
if (WIN32)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
execute_process(
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
ERROR_VARIABLE output
+ OUTPUT_QUIET
)
if (output MATCHES "dyld-1015\.7")
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
UNAME_M := $(shell uname -m)
endif
-ifeq '' '$(findstring clang,$(shell $(CC) --version))'
- CC_IS_GCC=1
- CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
-else
- CC_IS_CLANG=1
- ifeq '' '$(findstring Apple,$(shell $(CC) --version))'
- CC_IS_LLVM_CLANG=1
- else
- CC_IS_APPLE_CLANG=1
- endif
- CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
- | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
-endif
-
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
ifeq ($(UNAME_S),Darwin)
# -Ofast tends to produce faster code, but may not be available for some compilers.
ifdef LLAMA_FAST
-MK_CFLAGS += -Ofast
-MK_HOST_CXXFLAGS += -Ofast
-MK_CUDA_CXXFLAGS += -O3
+MK_CFLAGS += -Ofast
+HOST_CXXFLAGS += -Ofast
+MK_NVCCFLAGS += -O3
else
-MK_CFLAGS += -O3
-MK_CXXFLAGS += -O3
+MK_CFLAGS += -O3
+MK_CXXFLAGS += -O3
endif
# clock_gettime came in POSIX.1b (1993)
-Werror=implicit-function-declaration
MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
-ifeq ($(CC_IS_CLANG), 1)
- # clang options
- MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
- MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
-
- ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
- MK_CFLAGS += -Wdouble-promotion
- endif
- ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
- MK_CFLAGS += -Wdouble-promotion
- endif
-else
- # gcc options
- MK_CFLAGS += -Wdouble-promotion
- MK_HOST_CXXFLAGS += -Wno-array-bounds
-
- ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
- MK_HOST_CXXFLAGS += -Wno-format-truncation
- endif
- ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
- MK_HOST_CXXFLAGS += -Wextra-semi
- endif
-endif
-
# this version of Apple ld64 is buggy
ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
# Use all CPU extensions that are available:
- MK_CFLAGS += -march=native -mtune=native
- MK_HOST_CXXFLAGS += -march=native -mtune=native
+ MK_CFLAGS += -march=native -mtune=native
+ HOST_CXXFLAGS += -march=native -mtune=native
# Usage AVX-only
#MK_CFLAGS += -mfma -mf16c -mavx
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
OBJS += ggml-cuda.o
- NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
+ MK_NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
ifdef LLAMA_DEBUG
- NVCCFLAGS += -lineinfo
+ MK_NVCCFLAGS += -lineinfo
endif
ifdef LLAMA_CUDA_NVCC
NVCC = nvcc
endif #LLAMA_CUDA_NVCC
ifdef CUDA_DOCKER_ARCH
- NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
-else ifdef CUDA_POWER_ARCH
- NVCCFLAGS +=
-else
- NVCCFLAGS += -arch=native
+ MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
+else ifndef CUDA_POWER_ARCH
+ MK_NVCCFLAGS += -arch=native
endif # CUDA_DOCKER_ARCH
ifdef LLAMA_CUDA_FORCE_DMMV
- NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
endif # LLAMA_CUDA_FORCE_DMMV
ifdef LLAMA_CUDA_FORCE_MMQ
- NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
endif # LLAMA_CUDA_FORCE_MMQ
ifdef LLAMA_CUDA_DMMV_X
- NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
else
- NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
endif # LLAMA_CUDA_DMMV_X
ifdef LLAMA_CUDA_MMV_Y
- NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
else ifdef LLAMA_CUDA_DMMV_Y
- NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
else
- NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
endif # LLAMA_CUDA_MMV_Y
ifdef LLAMA_CUDA_F16
- NVCCFLAGS += -DGGML_CUDA_F16
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
endif # LLAMA_CUDA_F16
ifdef LLAMA_CUDA_DMMV_F16
- NVCCFLAGS += -DGGML_CUDA_F16
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
endif # LLAMA_CUDA_DMMV_F16
ifdef LLAMA_CUDA_KQUANTS_ITER
- NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
else
- NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
endif
ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
- NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
else
- NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
#ifdef LLAMA_CUDA_CUBLAS
-# NVCCFLAGS += -DGGML_CUDA_CUBLAS
+# MK_NVCCFLAGS += -DGGML_CUDA_CUBLAS
#endif # LLAMA_CUDA_CUBLAS
ifdef LLAMA_CUDA_CCBIN
- NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
+ MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
endif
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
- $(NVCC) $(NVCCFLAGS) -c $< -o $@
+ $(NVCC) $(BASE_CXXFLAGS) $(NVCCFLAGS) -Wno-pedantic -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endif # LLAMA_CUBLAS
ifdef LLAMA_CLBLAST
$(CC) $(CFLAGS) -c $< -o $@
endif # LLAMA_MPI
+GF_CC := $(CC)
+include scripts/get-flags.mk
+
# combine build flags with cmdline overrides
-override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(CFLAGS)
-override CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS)
-override CUDA_CXXFLAGS := $(MK_CUDA_CXXFLAGS) $(CUDA_CXXFLAGS)
-override HOST_CXXFLAGS := $(MK_HOST_CXXFLAGS) $(HOST_CXXFLAGS)
-override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
-
-# save CXXFLAGS before we add host-only options
-NVCCFLAGS := $(NVCCFLAGS) $(CXXFLAGS) $(CUDA_CXXFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)"
-override CXXFLAGS += $(HOST_CXXFLAGS)
+override CFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
+BASE_CXXFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) $(MK_CXXFLAGS) $(CXXFLAGS)
+override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS)
+override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
+override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
+
+# identify CUDA host compiler
+ifdef LLAMA_CUBLAS
+GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
+include scripts/get-flags.mk
+CUDA_CXXFLAGS := $(GF_CXXFLAGS)
+endif
#
# Print build information
--- /dev/null
+ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
+ GF_CC_IS_GCC = 1
+ GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
+else
+ GF_CC_IS_CLANG = 1
+ ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
+ GF_CC_IS_LLVM_CLANG = 1
+ else
+ GF_CC_IS_APPLE_CLANG = 1
+ endif
+ GF_CC_VER := \
+ $(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
+ | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
+endif
+
+ifeq ($(GF_CC_IS_CLANG), 1)
+ # clang options
+ GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return
+ GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
+
+ ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))'
+ GF_CFLAGS += -Wdouble-promotion
+ endif
+ ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))'
+ GF_CFLAGS += -Wdouble-promotion
+ endif
+else
+ # gcc options
+ GF_CFLAGS = -Wdouble-promotion
+ GF_CXXFLAGS = -Wno-array-bounds
+
+ ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1)
+ GF_CXXFLAGS += -Wno-format-truncation
+ endif
+ ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1)
+ GF_CXXFLAGS += -Wextra-semi
+ endif
+endif