find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag)
-if (LLAMA_FATAL_WARNINGS)
- if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- add_compile_options(-Werror)
- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
- add_compile_options(/WX)
- endif()
-endif()
-
# enable libstdc++ assertions for debug builds
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction()
+if (LLAMA_FATAL_WARNINGS)
+ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ list(APPEND C_FLAGS -Werror)
+ list(APPEND CXX_FLAGS -Werror)
+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+ add_compile_options(/WX)
+ endif()
+endif()
+
if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
- set(WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
- set(C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
- -Werror=implicit-int -Werror=implicit-function-declaration)
- set(CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
+ list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
+ list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
+ -Werror=implicit-int -Werror=implicit-function-declaration)
+ list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
- set(C_FLAGS ${WARNING_FLAGS} ${C_FLAGS})
- set(CXX_FLAGS ${WARNING_FLAGS} ${CXX_FLAGS})
+ list(APPEND C_FLAGS ${WARNING_FLAGS})
+ list(APPEND CXX_FLAGS ${WARNING_FLAGS})
get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
if (LLAMA_CUBLAS)
set(CUDA_FLAGS -use_fast_math)
+ if (LLAMA_FATAL_WARNINGS)
+ list(APPEND CUDA_FLAGS -Werror all-warnings)
+ endif()
+
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
ifeq ($(LLAMA_FATAL_WARNINGS),1)
- MK_CFLAGS += -Werror
+ MK_CFLAGS += -Werror
MK_CXXFLAGS += -Werror
endif
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
OBJS += ggml-cuda.o
MK_NVCCFLAGS += -use_fast_math
+ifdef LLAMA_FATAL_WARNINGS
+ MK_NVCCFLAGS += -Werror all-warnings
+endif # LLAMA_FATAL_WARNINGS
ifndef JETSON_EOL_MODULE_DETECT
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
endif # JETSON_EOL_MODULE_DETECT
return a;
}
-static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
-#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
-#pragma unroll
- for (int mask = 16; mask > 0; mask >>= 1) {
- a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
- }
- return a;
-#else
- (void) a;
- NO_DEVICE_CODE;
-#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
-}
+//static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
+//#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
+//#pragma unroll
+// for (int mask = 16; mask > 0; mask >>= 1) {
+// a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
+// }
+// return a;
+//#else
+// (void) a;
+// NO_DEVICE_CODE;
+//#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
+//}
static __device__ __forceinline__ float warp_reduce_max(float x) {
#pragma unroll
return x;
}
-static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
-#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
-#pragma unroll
- for (int mask = 16; mask > 0; mask >>= 1) {
- x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
- }
- return x;
-#else
- (void) x;
- NO_DEVICE_CODE;
-#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
-}
+//static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
+//#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
+//#pragma unroll
+// for (int mask = 16; mask > 0; mask >>= 1) {
+// x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
+// }
+// return x;
+//#else
+// (void) x;
+// NO_DEVICE_CODE;
+//#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
+//}
static __device__ __forceinline__ float op_repeat(const float a, const float b) {
return b;
const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
#else
+ (void) ksigns64;
assert(false);
return 0.f;
#endif
#else
+ (void) ksigns64;
assert(false);
return 0.f;
#endif