set(LLAMA_METAL_DEFAULT OFF)
endif()
+# TODO: fix this for Android CI
+# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191
+#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
+# set(LLAMA_LLAMAFILE_DEFAULT OFF)
+#else()
+# set(LLAMA_LLAMAFILE_DEFAULT ON)
+#endif()
+
+# TODO: temporary disable until MoE is fixed
+# https://github.com/ggerganov/llama.cpp/pull/6716
+set(LLAMA_LLAMAFILE_DEFAULT OFF)
+
# general
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
option(LLAMA_STATIC "llama: static link libraries" OFF)
# 3rd party libs
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
option(LLAMA_BLAS "llama: use BLAS" OFF)
-option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ON)
+option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUDA "llama: use CUDA" OFF)
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
if (LLAMA_LLAMAFILE)
add_compile_definitions(GGML_USE_LLAMAFILE)
+
+ set(GGML_HEADERS_LLAMAFILE sgemm.h)
+ set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
endif()
if (LLAMA_QKK_64)
ggml-backend.h
ggml-quants.c
ggml-quants.h
- sgemm.cpp
- sgemm.h
- ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
- ${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
- ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
- ${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
- ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
- ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
- ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
- ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
- ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
+ ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
+ ${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
+ ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
+ ${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
+ ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
+ ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
+ ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
+ ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
+ ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
+ ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
)
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
endif # LLAMA_DISABLE_LOGS
-# disable ggml.c's use of sgemm.cpp
-ifdef LLAMA_NO_LLAMAFILE
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
-else
- MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=1
-endif
-
# warnings
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
MK_LDFLAGS += $(shell pkg-config --libs openblas)
endif # LLAMA_OPENBLAS
+# TODO: temporary disable until MoE is fixed
+# https://github.com/ggerganov/llama.cpp/pull/6716
+LLAMA_NO_LLAMAFILE := 1
+
+ifndef LLAMA_NO_LLAMAFILE
+ MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
+ OBJS += sgemm.o
+endif
+
ifdef LLAMA_BLIS
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
MK_LDFLAGS += -lblis -L/usr/local/lib
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
$(NVCC_COMPILE)
-
endif # LLAMA_CUDA
ifdef LLAMA_CLBLAST
-
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
$(CC) $(CFLAGS) -c $< -o $@
endif # LLAMA_MPI
+ifndef LLAMA_NO_LLAMAFILE
+sgemm.o: sgemm.cpp sgemm.h ggml.h
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+endif
+
GF_CC := $(CC)
include scripts/get-flags.mk
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@
-sgemm.o: sgemm.cpp sgemm.h ggml.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
-
unicode.o: unicode.cpp unicode.h
$(CXX) $(CXXFLAGS) -c $< -o $@
unicode-data.o: unicode-data.cpp unicode-data.h
$(CXX) $(CXXFLAGS) -c $< -o $@
-OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o sgemm.o
+OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@