*.o
*.a
+*.d
.cache/
.coreml/
.test/
.swiftpm
*.metallib
+ggml-metal-embed.metal
+ggml-metal-embed.metal.tmp
+
/main
/stream
/command
else
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
-endif
+endif # RISCV
ifndef GGML_NO_ACCELERATE
# Mac OS - include Accelerate framework.
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
ifeq ($(UNAME_S),Darwin)
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
MK_LDFLAGS += -framework Accelerate
- OBJ_GGML += ggml/src/ggml-blas.o
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
endif
endif # GGML_NO_ACCELERATE
MK_CXXFLAGS += -fopenmp
endif # GGML_NO_OPENMP
+ifdef WHISPER_COREML
+ MK_CXXFLAGS += -DWHISPER_USE_COREML
+ LDFLAGS += -framework Foundation -framework CoreML
+
+ifdef WHISPER_COREML_ALLOW_FALLBACK
+ MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
+endif
+endif # WHISPER_COREML
+
ifdef GGML_OPENBLAS
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
MK_LDFLAGS += $(shell pkg-config --libs openblas)
- OBJ_GGML += ggml/src/ggml-blas.o
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
endif # GGML_OPENBLAS
ifdef GGML_OPENBLAS64
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
- OBJ_GGML += ggml/src/ggml-blas.o
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
endif # GGML_OPENBLAS64
ifdef GGML_BLIS
MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
MK_LDFLAGS += -lblis -L/usr/local/lib
- OBJ_GGML += ggml/src/ggml-blas.o
+ OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
endif # GGML_BLIS
ifdef GGML_RPC
MK_CPPFLAGS += -DGGML_USE_RPC
- OBJ_GGML += ggml/src/ggml-rpc.o
+ OBJ_GGML += ggml/src/ggml-rpc/ggml-rpc.o
endif # GGML_RPC
OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_NVCCFLAGS += -use_fast_math
- OBJ_GGML += ggml/src/ggml-cuda.o
+ OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
OBJ_GGML += $(OBJ_CUDA_TMPL)
ifdef WHISPER_FATAL_WARNINGS
ggml/src/ggml-cuda/common.cuh
$(NVCC_COMPILE)
-ggml/src/ggml-cuda.o: \
- ggml/src/ggml-cuda.cu \
+ggml/src/ggml-cuda/ggml-cuda.o: \
+ ggml/src/ggml-cuda/ggml-cuda.cu \
+ ggml/include/ggml-cuda.h \
ggml/include/ggml.h \
ggml/include/ggml-backend.h \
- ggml/include/ggml-cuda.h \
ggml/src/ggml-backend-impl.h \
ggml/src/ggml-common.h \
$(wildcard ggml/src/ggml-cuda/*.cuh)
ifdef GGML_METAL
MK_CPPFLAGS += -DGGML_USE_METAL
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
- OBJ_GGML += ggml/src/ggml-metal.o
+ OBJ_GGML += ggml/src/ggml-metal/ggml-metal.o
ifdef GGML_METAL_NDEBUG
MK_CPPFLAGS += -DGGML_METAL_NDEBUG
endif
ifdef GGML_METAL_EMBED_LIBRARY
MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
- OBJ_GGML += ggml/src/ggml-metal-embed.o
+ OBJ_GGML += ggml/src/ggml-metal/ggml-metal-embed.o
endif
endif # GGML_METAL
-ifdef WHISPER_COREML
- MK_CXXFLAGS += -DWHISPER_USE_COREML
- LDFLAGS += -framework Foundation -framework CoreML
-
-ifdef WHISPER_COREML_ALLOW_FALLBACK
- MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
-endif
-endif
-
-# ===
-
ifdef GGML_METAL
-ggml/src/ggml-metal.o: \
- ggml/src/ggml-metal.m \
+ggml/src/ggml-metal/ggml-metal.o: \
+ ggml/src/ggml-metal/ggml-metal.m \
+ ggml/src/ggml-metal/ggml-metal-impl.h \
ggml/include/ggml-metal.h \
ggml/include/ggml.h
$(CC) $(CFLAGS) -c $< -o $@
ifdef GGML_METAL_EMBED_LIBRARY
-ggml/src/ggml-metal-embed.o: \
- ggml/src/ggml-metal.metal \
+ggml/src/ggml-metal/ggml-metal-embed.o: \
+ ggml/src/ggml-metal/ggml-metal.metal \
+ ggml/src/ggml-metal/ggml-metal-impl.h \
ggml/src/ggml-common.h
@echo "Embedding Metal library"
- @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
- @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
- @$(AS) $(TEMP_ASSEMBLY) -o $@
- @rm -f ${TEMP_ASSEMBLY}
+ @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
+ @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
+ $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
+ $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
+ @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
+ @rmdir ${TEMP_ASSEMBLY}
endif
endif # GGML_METAL
OBJ_GGML += \
ggml/src/ggml.o \
- ggml/src/ggml-cpu.o \
+ ggml/src/ggml-aarch64.o \
ggml/src/ggml-alloc.o \
ggml/src/ggml-backend.o \
+ ggml/src/ggml-backend-reg.o \
+ ggml/src/ggml-opt.o \
ggml/src/ggml-quants.o \
- ggml/src/ggml-aarch64.o
+ ggml/src/ggml-threading.o \
+ ggml/src/ggml-cpu/ggml-cpu.o \
+ ggml/src/ggml-cpu/ggml-cpu-cpp.o \
+ ggml/src/ggml-cpu/ggml-cpu-aarch64.o \
+ ggml/src/ggml-cpu/ggml-cpu-quants.o
OBJ_WHISPER += \
src/whisper.o
# Build libraries
#
-# ggml
-
-ggml/src/ggml.o: \
- ggml/src/ggml.c \
- ggml/include/ggml.h
- $(CC) $(CFLAGS) -c $< -o $@
-
-ggml/src/ggml-cpu.o: \
- ggml/src/ggml-cpu.c \
- ggml/include/ggml.h \
- ggml/src/ggml-common.h
- $(CC) $(CFLAGS) -c $< -o $@
-
-ggml/src/ggml-alloc.o: \
- ggml/src/ggml-alloc.c \
- ggml/include/ggml.h \
- ggml/include/ggml-alloc.h
- $(CC) $(CFLAGS) -c $< -o $@
-
-ggml/src/ggml-backend.o: \
- ggml/src/ggml-backend.cpp \
- ggml/include/ggml.h \
- ggml/include/ggml-backend.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
-
-ggml/src/ggml-quants.o: \
- ggml/src/ggml-quants.c \
- ggml/include/ggml.h \
- ggml/src/ggml-quants.h \
- ggml/src/ggml-common.h
- $(CC) $(CFLAGS) -c $< -o $@
-
-ggml/src/ggml-aarch64.o: \
- ggml/src/ggml-aarch64.c \
- ggml/include/ggml.h \
- ggml/src/ggml-aarch64.h \
- ggml/src/ggml-common.h
- $(CC) $(CFLAGS) -c $< -o $@
+LIB_GGML = libggml.so
+LIB_GGML_S = libggml.a
-ggml/src/ggml-blas.o: \
- ggml/src/ggml-blas.cpp \
- ggml/include/ggml-blas.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
+LIB_LLAMA = libllama.so
+LIB_LLAMA_S = libllama.a
-ifdef GGML_LLAMAFILE
-ggml/src/sgemm.o: \
- ggml/src/sgemm.cpp \
- ggml/src/sgemm.h \
- ggml/include/ggml.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
-endif # GGML_LLAMAFILE
+LIB_COMMON = libcommon.so
+LIB_COMMON_S = libcommon.a
-ifdef GGML_RPC
-ggml/src/ggml-rpc.o: \
- ggml/src/ggml-rpc.cpp \
- ggml/include/ggml-rpc.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
-endif # GGML_RPC
+LIB_COMMON_SDL = libcommon-sdl.so
+LIB_COMMON_SDL_S = libcommon-sdl.a
-$(LIB_GGML): \
- $(OBJ_GGML)
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
+# Targets
+BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S)
-$(LIB_GGML_S): \
- $(OBJ_GGML)
- ar rcs $(LIB_GGML_S) $^
+# Dependency files
+DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
-# whisper
+# Default target
+all: $(BUILD_TARGETS)
-src/whisper.o: \
- src/whisper.cpp \
- include/whisper.h \
+# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
+# g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
+ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
+ ggml/src/ggml-cpu/ggml-cpu.cpp \
+ ggml/include/ggml-backend.h \
ggml/include/ggml.h \
ggml/include/ggml-alloc.h \
- ggml/include/ggml-backend.h \
- ggml/include/ggml-cuda.h \
- ggml/include/ggml-metal.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
+ ggml/src/ggml-backend-impl.h \
+ ggml/include/ggml-cpu.h \
+ ggml/src/ggml-impl.h
+ $(CXX) $(CXXFLAGS) -c $< -o $@
-$(LIB_WHISPER): \
- $(OBJ_WHISPER) \
- $(LIB_GGML)
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
+# Rules for building object files
+ggml/%.o: ggml/%.c
+ $(CC) $(CFLAGS) -MMD -c $< -o $@
-$(LIB_WHISPER_S): \
- $(OBJ_WHISPER) \
- $(OBJ_GGML)
- ar rcs $(LIB_WHISPER_S) $^
+ggml/%.o: ggml/%.cpp
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
-# common
+src/%.o: src/%.cpp
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
-examples/common.o: \
- examples/common.cpp \
- examples/common.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
+examples/%.o: examples/%.cpp
+ $(CXX) $(CXXFLAGS) -MMD -c $< -o $@
-examples/common-ggml.o: \
- examples/common-ggml.cpp \
- examples/common-ggml.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
+# Rules for building libraries
+$(LIB_GGML): $(OBJ_GGML)
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
+
+$(LIB_GGML_S): $(OBJ_GGML)
+ ar rcs $(LIB_GGML_S) $^
-$(LIB_COMMON): \
- $(OBJ_COMMON)
+$(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML)
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
-$(LIB_COMMON_S): \
- $(OBJ_COMMON)
- ar rcs $(LIB_COMMON_S) $^
+$(LIB_LLAMA_S): $(OBJ_LLAMA)
+ ar rcs $(LIB_LLAMA_S) $^
# common-sdl
examples/common-sdl.h
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
-$(LIB_COMMON_SDL): \
- $(OBJ_SDL)
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) $(LDFLAGS_SDL)
+$(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML)
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
+
+$(LIB_COMMON_S): $(OBJ_COMMON)
+ ar rcs $(LIB_COMMON_S) $^
-$(LIB_COMMON_SDL_S): \
- $(OBJ_SDL)
- ar rcs $(LIB_COMMON_SDL_S) $^
+# Include dependency files
+-include $(DEP_FILES)
+# Clean rule
clean:
- rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS)
- rm -rvf src/*.o
- rm -rvf src/coreml/*.o
- rm -rvf tests/*.o
- rm -rvf examples/*.o
- rm -rvf *.a
- rm -rvf *.dll
- rm -rvf *.so
- rm -rvf *.dot
- rm -rvf ggml/*.a
- rm -rvf ggml/*.dll
- rm -rvf ggml/*.so
- rm -vrf ggml/src/*.o
- rm -vrf ggml/src/ggml-metal-embed.metal
- rm -vrf ggml/src/ggml-cuda/*.o
- rm -vrf ggml/src/ggml-cuda/template-instances/*.o
- rm -rvf $(BUILD_TARGETS)
- rm -rvf $(TEST_TARGETS)
- find examples -type f -name "*.o" -delete
+ rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS)
+ rm -rvf *.a *.dll *.so *.dot
+ find ggml src tests examples -type f -name "*.o" -delete
+ find ggml src tests examples -type f -name "*.d" -delete
#
# Examples
"tests",
"CMakeLists.txt",
"Makefile",
- "ggml/src/ggml-metal-embed.metal"
+ "ggml/src/ggml-metal/ggml-metal-embed.metal"
],
sources: [
"ggml/src/ggml.c",
"ggml/src/ggml-aarch64.c",
"ggml/src/ggml-alloc.c",
"ggml/src/ggml-backend.cpp",
- "ggml/src/ggml-cpu.c",
+ "ggml/src/ggml-backend-reg.cpp",
+ "ggml/src/ggml-cpu/ggml-cpu.c",
+ "ggml/src/ggml-cpu/ggml-cpu.cpp",
+ "ggml/src/ggml-cpu/ggml-cpu-aarch64.c",
+ "ggml/src/ggml-cpu/ggml-cpu-quants.c",
"ggml/src/ggml-quants.c",
- "ggml/src/ggml-metal.m"
+ "ggml/src/ggml-threading.cpp",
+ "ggml/src/ggml-metal/ggml-metal.m"
],
- resources: [.process("ggml/src/ggml-metal.metal")],
+ resources: [.process("ggml/src/ggml-metal/ggml-metal.metal")],
publicHeadersPath: "spm-headers",
cSettings: [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
- .define("GGML_USE_ACCELERATE"),
.unsafeFlags(["-fno-objc-arc"]),
+ .headerSearchPath("ggml/src"),
+ .define("GGML_USE_ACCELERATE"),
.define("GGML_USE_METAL")
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
SOURCE_FILES
${SOURCE_FILES}
${WHISPER_LIB_DIR}/ggml/src/ggml.c
- ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu.c
${WHISPER_LIB_DIR}/ggml/src/ggml-aarch64.c
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.cpp
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-backend-reg.cpp
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-threading.cpp
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.c
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu.cpp
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-aarch64.c
+ ${WHISPER_LIB_DIR}/ggml/src/ggml-cpu/ggml-cpu-quants.c
)
endif()
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
+ 18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */; };
+ 18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */; };
+ 18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */; };
+ 18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */; };
+ 18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */; };
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
7FE3424C2A0C3FA20015A058 /* whisper-encoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342472A0C3FA20015A058 /* whisper-encoder.mm */; };
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */; };
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-aarch64.c"; path = "../../../ggml/src/ggml-aarch64.c"; sourceTree = "<group>"; };
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
- 1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal.m"; sourceTree = "<group>"; };
- 1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal.metal"; sourceTree = "<group>"; };
+ 1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal/ggml-metal.m"; sourceTree = "<group>"; };
+ 1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal/ggml-metal.metal"; sourceTree = "<group>"; };
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
18ABE1572AF556340044A204 /* ggml-backend.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; name = "ggml-backend.cpp"; path = "../../../ggml/src/ggml-backend.cpp"; sourceTree = "<group>"; };
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
- 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu.c"; sourceTree = "<group>"; };
+ 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.c"; sourceTree = "<group>"; };
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu.h"; path = "../../../ggml/include/ggml-cpu.h"; sourceTree = "<group>"; };
+ 18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-threading.h"; path = "../../../ggml/src/ggml-threading.h"; sourceTree = "<group>"; };
+ 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-threading.cpp"; path = "../../../ggml/src/ggml-threading.cpp"; sourceTree = "<group>"; };
+ 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-cpu.cpp"; path = "../../../ggml/src/ggml-cpu/ggml-cpu.cpp"; sourceTree = "<group>"; };
+ 18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-aarch64.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.h"; sourceTree = "<group>"; };
+ 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-aarch64.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-aarch64.c"; sourceTree = "<group>"; };
+ 18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-impl.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-impl.h"; sourceTree = "<group>"; };
+ 18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-cpu-quants.h"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.h"; sourceTree = "<group>"; };
+ 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "ggml-cpu-quants.c"; path = "../../../ggml/src/ggml-cpu/ggml-cpu-quants.c"; sourceTree = "<group>"; };
+ 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = "ggml-backend-reg.cpp"; path = "../../../ggml/src/ggml-backend-reg.cpp"; sourceTree = "<group>"; };
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
isa = PBXGroup;
children = (
+ 18F8C0C62CEDF7AB00CAD607 /* ggml-backend-reg.cpp */,
+ 18F8C0BF2CEDF52700CAD607 /* ggml-cpu-aarch64.h */,
+ 18F8C0C02CEDF52700CAD607 /* ggml-cpu-aarch64.c */,
+ 18F8C0C12CEDF52700CAD607 /* ggml-cpu-impl.h */,
+ 18F8C0C22CEDF52700CAD607 /* ggml-cpu-quants.h */,
+ 18F8C0C32CEDF52700CAD607 /* ggml-cpu-quants.c */,
+ 18F8C0BD2CEDF50700CAD607 /* ggml-cpu.cpp */,
+ 18F8C0BA2CEDF4DC00CAD607 /* ggml-threading.h */,
+ 18F8C0BB2CEDF4DC00CAD607 /* ggml-threading.cpp */,
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
18133C7F2C64E342005CEAAC /* ggml-aarch64.c */,
18627C9629052C5800BD2A04 /* ggml.c in Sources */,
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
7FE3424D2A0C3FA20015A058 /* whisper-decoder-impl.m in Sources */,
+ 18F8C0C72CEDF7AB00CAD607 /* ggml-backend-reg.cpp in Sources */,
+ 18F8C0BE2CEDF50700CAD607 /* ggml-cpu.cpp in Sources */,
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */,
+ 18F8C0C42CEDF52700CAD607 /* ggml-cpu-aarch64.c in Sources */,
+ 18F8C0C52CEDF52700CAD607 /* ggml-cpu-quants.c in Sources */,
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */,
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */,
18627C8C29052BE000BD2A04 /* main.m in Sources */,
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
+ 18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */,
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
);
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = "";
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = "";
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
DEVELOPMENT_TEAM = P8JZH34X63;
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
GENERATE_INFOPLIST_FILE = YES;
+ HEADER_SEARCH_PATHS = ../../../ggml/src/;
INFOPLIST_FILE = whisper.objc/Info.plist;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
DEVELOPMENT_TEAM = P8JZH34X63;
GCC_WARN_64_TO_32_BIT_CONVERSION = NO;
GENERATE_INFOPLIST_FILE = YES;
+ HEADER_SEARCH_PATHS = ../../../ggml/src/;
INFOPLIST_FILE = whisper.objc/Info.plist;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
private func systemInfo() -> String {
var info = ""
if (ggml_cpu_has_neon() != 0) { info += "NEON " }
- if (ggml_cpu_has_metal() != 0) { info += "METAL " }
- if (ggml_cpu_has_blas() != 0) { info += "BLAS " }
return String(info.dropLast())
}
+++ /dev/null
-#!/usr/bin/env python
-
-import logging
-import argparse
-import asyncio
-import os
-from tempfile import gettempdir
-
-logger = logging.getLogger("ggml-vk-generate-shaders")
-
-GLSLC = "glslc"
-
-type_names = [
- "f32",
- "f16",
- "q4_0",
- "q4_1",
- "q5_0",
- "q5_1",
- "q8_0",
- "q2_k",
- "q3_k",
- "q4_k",
- "q5_k",
- "q6_k",
-]
-
-ASYNCIO_CONCURRENCY = 64
-
-input_dir = "vulkan-shaders"
-output_dir = gettempdir()
-
-lock = asyncio.Lock()
-shader_fnames = []
-
-
-async def string_to_spv(name, in_fname, defines, fp16=True):
- name = f"{name}{'_fp32' if not fp16 else ''}"
- out_fname = os.path.join(output_dir, f"{name}.spv")
-
- in_path = os.path.join(input_dir, in_fname)
-
- cmd = [GLSLC, "-fshader-stage=compute", "--target-env=vulkan1.2", "-O", in_path, "-o", out_fname]
-
- cmd.extend([f"-D{key}={value}" for key, value in defines.items()])
-
- proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
-
- stdout, stderr = await proc.communicate()
-
- stdout = stdout.decode()
- error = stderr.decode()
-
- if proc.returncode:
- cmd = " ".join(cmd)
- logger.error(f"cannot compile {name}\n\n{cmd}\n\n{error}")
- return
-
- async with lock:
- shader_fnames.append((name, out_fname))
-
-
-def matmul_shaders(tasks, fp16, matmul_id):
- if fp16:
- load_vec = "8"
- aligned_b_type_f32 = "mat2x4"
- aligned_b_type_f16 = "f16mat2x4"
- else:
- load_vec = "4"
- aligned_b_type_f32 = "vec4"
- aligned_b_type_f16 = "f16vec4"
-
- base_dict = {"FLOAT_TYPE": "float" if not fp16 else "float16_t"}
- shader_name = "matmul"
-
- if matmul_id:
- base_dict["MUL_MAT_ID"] = "1"
- shader_name = "matmul_id"
-
- if fp16:
- base_dict["FLOAT16"] = "1"
-
- # Shaders with f16 B_TYPE
- tasks.append(string_to_spv(f"{shader_name}_f32_f16", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
- tasks.append(string_to_spv(f"{shader_name}_f32_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
-
- tasks.append(string_to_spv(f"{shader_name}_f16", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
- tasks.append(string_to_spv(f"{shader_name}_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
-
- for tname in type_names:
- data_a_key = f"DATA_A_{tname.upper()}"
- load_vec_a = load_vec if tname in ("f32", "f16") else "2"
- tasks.append(string_to_spv(f"{shader_name}_{tname}_f32", "mul_mm.comp", base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
- tasks.append(string_to_spv(f"{shader_name}_{tname}_f32_aligned", "mul_mm.comp", base_dict | {data_a_key: "2", "LOAD_VEC_A": load_vec_a, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f32, "D_TYPE": "float"}, fp16))
-
-
-async def main():
- logger.info("ggml_vulkan: Generating and compiling shaders to SPIR-V")
-
- tasks = []
-
- for fp16 in (False, True):
- # MUL_MAT
- matmul_shaders(tasks, fp16, False)
- # MUL_MAT_ID
- matmul_shaders(tasks, fp16, True)
-
- for tname in type_names:
- base_dict = {"FLOAT_TYPE": "float"}
-
- # mul mat vec
- data_a_key = f"DATA_A_{tname.upper()}"
- shader = f"mul_mat_vec_{tname}.comp" if tname.endswith("_k") else "mul_mat_vec.comp"
-
- tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f32_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f16_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float16_t", "D_TYPE": "float"}))
-
- tasks.append(string_to_spv(f"mul_mat_vec_id_{tname}_f32", shader, base_dict | {"MUL_MAT_ID": "1", data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
-
- # Dequant shaders
- if tname != "f16":
- tasks.append(string_to_spv(f"dequant_{tname}", f"dequant_{tname}.comp", base_dict | {data_a_key: "1", "D_TYPE": "float16_t"}))
-
- # get_rows
- if not tname.endswith("_k"):
- shader = "get_rows.comp" if tname in ("f32", "f16") else "get_rows_quant.comp"
-
- if tname == "f16":
- tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
- else:
- tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t"}))
- tasks.append(string_to_spv(f"get_rows_{tname}_f32", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float"}))
-
- tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", "mul_mat_vec_p021.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", "mul_mat_vec_nc.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
-
- # Norms
- tasks.append(string_to_spv("norm_f32", "norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("rms_norm_f32", "rms_norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
-
- tasks.append(string_to_spv("cpy_f32_f32", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("cpy_f32_f16", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
- tasks.append(string_to_spv("cpy_f16_f16", "copy.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
-
- tasks.append(string_to_spv("add_f32", "add.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
-
- tasks.append(string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {}))
-
- tasks.append(string_to_spv("mul_f32", "mul.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
-
- tasks.append(string_to_spv("div_f32", "div.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
-
- tasks.append(string_to_spv("scale_f32", "scale.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
-
- tasks.append(string_to_spv("sqr_f32", "square.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
-
- tasks.append(string_to_spv("clamp_f32", "clamp.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
-
- tasks.append(string_to_spv("gelu_f32", "gelu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("silu_f32", "silu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("relu_f32", "relu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
-
- tasks.append(string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
-
- tasks.append(string_to_spv("soft_max_f32", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("soft_max_f32_f16", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float16_t", "D_TYPE": "float"}))
-
- tasks.append(string_to_spv("rope_norm_f32", "rope_norm.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("rope_norm_f16", "rope_norm.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
-
- tasks.append(string_to_spv("rope_neox_f32", "rope_neox.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
- tasks.append(string_to_spv("rope_neox_f16", "rope_neox.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
-
- tasks.append(string_to_spv("argsort_f32", "argsort.comp", {"A_TYPE": "float"}))
-
- tasks.append(string_to_spv("sum_rows_f32", "sum_rows.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
-
- # Helper to decorate tasks with semaphore acquisition.
- async def withSemaphore(sem, task):
- async with sem:
- return await task
-
- # Run tasks concurrently guarded by a concurrency limit.
- sem = asyncio.Semaphore(ASYNCIO_CONCURRENCY)
- await asyncio.gather(*(withSemaphore(sem, task) for task in tasks))
-
- with open("ggml-vulkan-shaders.hpp", "w") as f:
- f.write("#include <cstdint>\n\n")
- for name, path in sorted(shader_fnames):
-
- with open(path, "rb") as spv:
- counter = 0
- newline_counter = 0
- f.write(f"unsigned char {name}_data[] = {{\n")
- for val in spv.read():
- f.write(f"0x{val:02x},")
- newline_counter += 1
- counter += 1
- if newline_counter >= 12:
- newline_counter = 0
- f.write("\n")
- f.write("\n};\n")
- f.write(f"const uint64_t {name}_len = {counter};\n\n")
- os.remove(path)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator")
-
- parser.add_argument("--glslc", help="Path to glslc")
- parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
-
- args = parser.parse_args()
-
- logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
-
- if args.glslc:
- GLSLC = args.glslc
-
- asyncio.run(main())
+++ /dev/null
-#pragma once
-
-// GGML CPU internal header
-
-#include "ggml.h"
-#include "ggml-impl.h"
-#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
-//#include <stddef.h>
-#include <stdbool.h>
-#include <string.h> // memcpy
-#include <math.h> // fabsf
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(_MSC_VER)
-
-#define m512bh(p) p
-#define m512i(p) p
-
-#else
-
-#define m512bh(p) (__m512bh)(p)
-#define m512i(p) (__m512i)(p)
-
-#endif
-
-/**
- * Converts brain16 to float32.
- *
- * The bfloat16 floating point format has the following structure:
- *
- * ┌sign
- * │
- * │ ┌exponent
- * │ │
- * │ │ ┌mantissa
- * │ │ │
- * │┌──┴───┐┌─┴───┐
- * 0b0000000000000000 brain16
- *
- * Since bf16 has the same number of exponent bits as a 32bit float,
- * encoding and decoding numbers becomes relatively straightforward.
- *
- * ┌sign
- * │
- * │ ┌exponent
- * │ │
- * │ │ ┌mantissa
- * │ │ │
- * │┌──┴───┐┌─┴───────────────────┐
- * 0b00000000000000000000000000000000 IEEE binary32
- *
- * For comparison, the standard fp16 format has fewer exponent bits.
- *
- * ┌sign
- * │
- * │ ┌exponent
- * │ │
- * │ │ ┌mantissa
- * │ │ │
- * │┌─┴─┐┌─┴──────┐
- * 0b0000000000000000 IEEE binary16
- *
- * @see IEEE 754-2008
- */
-static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
- union {
- float f;
- uint32_t i;
- } u;
- u.i = (uint32_t)h.bits << 16;
- return u.f;
-}
-
-/**
- * Converts float32 to brain16.
- *
- * This is binary identical with Google Brain float conversion.
- * Floats shall round to nearest even, and NANs shall be quiet.
- * Subnormals aren't flushed to zero, except perhaps when used.
- * This code should vectorize nicely if using modern compilers.
- */
-static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
- ggml_bf16_t h;
- union {
- float f;
- uint32_t i;
- } u;
- u.f = s;
- if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
- h.bits = (u.i >> 16) | 64; /* force to quiet */
- return h;
- }
- h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
- return h;
-}
-
-#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
-#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
-
-// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
-#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
-#ifndef __FMA__
-#define __FMA__
-#endif
-#ifndef __F16C__
-#define __F16C__
-#endif
-#endif
-
-// __SSE3__ and __SSSE3__ are not defined in MSVC, but SSE3/SSSE3 are present when AVX/AVX2/AVX512 are available
-#if defined(_MSC_VER) && (defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__))
-#ifndef __SSE3__
-#define __SSE3__
-#endif
-#ifndef __SSSE3__
-#define __SSSE3__
-#endif
-#endif
-
-#if defined(__ARM_FEATURE_SVE)
-#include <arm_sve.h>
-#include <sys/prctl.h>
-#endif
-
-// 16-bit float
-// on Arm, we use __fp16
-// on x86, we use uint16_t
-#if defined(__ARM_NEON)
-
-// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
-//
-// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
-//
-#include <arm_neon.h>
-
-#ifdef _MSC_VER
-
-typedef uint16_t ggml_fp16_internal_t;
-
-#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
-
-#else
-
-typedef __fp16 ggml_fp16_internal_t;
-
-#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
-
-#endif // _MSC_VER
-
-#if !defined(__aarch64__)
-
-// 32-bit ARM compatibility
-
-// vaddlvq_s16
-// vpaddq_s16
-// vpaddq_s32
-// vaddvq_s32
-// vaddvq_f32
-// vmaxvq_f32
-// vcvtnq_s32_f32
-// vzip1_u8
-// vzip2_u8
-
-inline static int32_t vaddlvq_s16(int16x8_t v) {
- int32x4_t v0 = vreinterpretq_s32_s64(vpaddlq_s32(vpaddlq_s16(v)));
- return vgetq_lane_s32(v0, 0) + vgetq_lane_s32(v0, 2);
-}
-
-inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
- int16x4_t a0 = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
- int16x4_t b0 = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
- return vcombine_s16(a0, b0);
-}
-
-inline static int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
- int32x2_t a0 = vpadd_s32(vget_low_s32(a), vget_high_s32(a));
- int32x2_t b0 = vpadd_s32(vget_low_s32(b), vget_high_s32(b));
- return vcombine_s32(a0, b0);
-}
-
-inline static int32_t vaddvq_s32(int32x4_t v) {
- return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
-}
-
-inline static float vaddvq_f32(float32x4_t v) {
- return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
-}
-
-inline static float vmaxvq_f32(float32x4_t v) {
- return
- MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
- MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
-}
-
-inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
- int32x4_t res;
-
- res[0] = roundf(vgetq_lane_f32(v, 0));
- res[1] = roundf(vgetq_lane_f32(v, 1));
- res[2] = roundf(vgetq_lane_f32(v, 2));
- res[3] = roundf(vgetq_lane_f32(v, 3));
-
- return res;
-}
-
-inline static uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b) {
- uint8x8_t res;
-
- res[0] = a[0]; res[1] = b[0];
- res[2] = a[1]; res[3] = b[1];
- res[4] = a[2]; res[5] = b[2];
- res[6] = a[3]; res[7] = b[3];
-
- return res;
-}
-
-inline static uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
- uint8x8_t res;
-
- res[0] = a[4]; res[1] = b[4];
- res[2] = a[5]; res[3] = b[5];
- res[4] = a[6]; res[5] = b[6];
- res[6] = a[7]; res[7] = b[7];
-
- return res;
-}
-
-// vld1q_s16_x2
-// vld1q_u8_x2
-// vld1q_u8_x4
-// vld1q_s8_x2
-// vld1q_s8_x4
-// TODO: double-check these work correctly
-
-typedef struct ggml_int16x8x2_t {
- int16x8_t val[2];
-} ggml_int16x8x2_t;
-
-inline static ggml_int16x8x2_t ggml_vld1q_s16_x2(const int16_t * ptr) {
- ggml_int16x8x2_t res;
-
- res.val[0] = vld1q_s16(ptr + 0);
- res.val[1] = vld1q_s16(ptr + 8);
-
- return res;
-}
-
-typedef struct ggml_uint8x16x2_t {
- uint8x16_t val[2];
-} ggml_uint8x16x2_t;
-
-inline static ggml_uint8x16x2_t ggml_vld1q_u8_x2(const uint8_t * ptr) {
- ggml_uint8x16x2_t res;
-
- res.val[0] = vld1q_u8(ptr + 0);
- res.val[1] = vld1q_u8(ptr + 16);
-
- return res;
-}
-
-typedef struct ggml_uint8x16x4_t {
- uint8x16_t val[4];
-} ggml_uint8x16x4_t;
-
-inline static ggml_uint8x16x4_t ggml_vld1q_u8_x4(const uint8_t * ptr) {
- ggml_uint8x16x4_t res;
-
- res.val[0] = vld1q_u8(ptr + 0);
- res.val[1] = vld1q_u8(ptr + 16);
- res.val[2] = vld1q_u8(ptr + 32);
- res.val[3] = vld1q_u8(ptr + 48);
-
- return res;
-}
-
-typedef struct ggml_int8x16x2_t {
- int8x16_t val[2];
-} ggml_int8x16x2_t;
-
-inline static ggml_int8x16x2_t ggml_vld1q_s8_x2(const int8_t * ptr) {
- ggml_int8x16x2_t res;
-
- res.val[0] = vld1q_s8(ptr + 0);
- res.val[1] = vld1q_s8(ptr + 16);
-
- return res;
-}
-
-typedef struct ggml_int8x16x4_t {
- int8x16_t val[4];
-} ggml_int8x16x4_t;
-
-inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
- ggml_int8x16x4_t res;
-
- res.val[0] = vld1q_s8(ptr + 0);
- res.val[1] = vld1q_s8(ptr + 16);
- res.val[2] = vld1q_s8(ptr + 32);
- res.val[3] = vld1q_s8(ptr + 48);
-
- return res;
-}
-
-// NOTE: not tested
-inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
- int8x16_t res;
-
- res[ 0] = a[b[ 0]];
- res[ 1] = a[b[ 1]];
- res[ 2] = a[b[ 2]];
- res[ 3] = a[b[ 3]];
- res[ 4] = a[b[ 4]];
- res[ 5] = a[b[ 5]];
- res[ 6] = a[b[ 6]];
- res[ 7] = a[b[ 7]];
- res[ 8] = a[b[ 8]];
- res[ 9] = a[b[ 9]];
- res[10] = a[b[10]];
- res[11] = a[b[11]];
- res[12] = a[b[12]];
- res[13] = a[b[13]];
- res[14] = a[b[14]];
- res[15] = a[b[15]];
-
- return res;
-}
-
-// NOTE: not tested
-inline static uint8x16_t ggml_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
- uint8x16_t res;
-
- res[ 0] = a[b[ 0]];
- res[ 1] = a[b[ 1]];
- res[ 2] = a[b[ 2]];
- res[ 3] = a[b[ 3]];
- res[ 4] = a[b[ 4]];
- res[ 5] = a[b[ 5]];
- res[ 6] = a[b[ 6]];
- res[ 7] = a[b[ 7]];
- res[ 8] = a[b[ 8]];
- res[ 9] = a[b[ 9]];
- res[10] = a[b[10]];
- res[11] = a[b[11]];
- res[12] = a[b[12]];
- res[13] = a[b[13]];
- res[14] = a[b[14]];
- res[15] = a[b[15]];
-
- return res;
-}
-
-#else
-
-#define ggml_int16x8x2_t int16x8x2_t
-#define ggml_uint8x16x2_t uint8x16x2_t
-#define ggml_uint8x16x4_t uint8x16x4_t
-#define ggml_int8x16x2_t int8x16x2_t
-#define ggml_int8x16x4_t int8x16x4_t
-
-#define ggml_vld1q_s16_x2 vld1q_s16_x2
-#define ggml_vld1q_u8_x2 vld1q_u8_x2
-#define ggml_vld1q_u8_x4 vld1q_u8_x4
-#define ggml_vld1q_s8_x2 vld1q_s8_x2
-#define ggml_vld1q_s8_x4 vld1q_s8_x4
-#define ggml_vqtbl1q_s8 vqtbl1q_s8
-#define ggml_vqtbl1q_u8 vqtbl1q_u8
-
-#endif // !defined(__aarch64__)
-
-#if !defined(__ARM_FEATURE_DOTPROD)
-
-inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) {
- const int16x8_t p0 = vmull_s8(vget_low_s8 (a), vget_low_s8 (b));
- const int16x8_t p1 = vmull_s8(vget_high_s8(a), vget_high_s8(b));
-
- return vaddq_s32(acc, vaddq_s32(vpaddlq_s16(p0), vpaddlq_s16(p1)));
-}
-
-#else
-
-#define ggml_vdotq_s32(a, b, c) vdotq_s32(a, b, c)
-
-#endif // !defined(__ARM_FEATURE_DOTPROD)
-
-#endif // defined(__ARM_NEON)
-
-#if defined(__ARM_NEON) && !defined(_MSC_VER)
-
-#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
-#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
-
-#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
-
-static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
- ggml_fp16_internal_t tmp;
- memcpy(&tmp, &h, sizeof(ggml_fp16_t));
- return (float)tmp;
-}
-
-static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
- ggml_fp16_t res;
- ggml_fp16_internal_t tmp = f;
- memcpy(&res, &tmp, sizeof(ggml_fp16_t));
- return res;
-}
-
-#else
-
-#ifdef __wasm_simd128__
-#include <wasm_simd128.h>
-#else
-#ifdef __POWER9_VECTOR__
-#include <altivec.h>
-#undef bool
-#define bool _Bool
-#else
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#include <intrin.h>
-#else
-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
-#if !defined(__riscv)
-#include <immintrin.h>
-#endif
-#endif
-#endif
-#endif
-#endif
-
-#ifdef __riscv_v_intrinsic
-#include <riscv_vector.h>
-#endif
-
-#if defined(__loongarch64)
-#if defined(__loongarch_asx)
-#include <lasxintrin.h>
-#endif
-#if defined(__loongarch_sx)
-#include <lsxintrin.h>
-#endif
-#endif
-
-#if defined(__loongarch_asx)
-
-typedef union {
- int32_t i;
- float f;
-} ft_union;
-
-/* float type data load instructions */
-static __m128 __lsx_vreplfr2vr_s(float val) {
- ft_union fi_tmpval = {.f = val};
- return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
-}
-
-static __m256 __lasx_xvreplfr2vr_s(float val) {
- ft_union fi_tmpval = {.f = val};
- return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
-}
-#endif
-
-#ifdef __F16C__
-
-#ifdef _MSC_VER
-#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
-#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
-#else
-#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
-#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
-#endif
-
-#elif defined(__POWER9_VECTOR__)
-
-#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
-#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
-/* the inline asm below is about 12% faster than the lookup method */
-#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
-#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
-
-static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
- register float f;
- register double d;
- __asm__(
- "mtfprd %0,%2\n"
- "xscvhpdp %0,%0\n"
- "frsp %1,%0\n" :
- /* temp */ "=d"(d),
- /* out */ "=f"(f):
- /* in */ "r"(h));
- return f;
-}
-
-static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
- register double d;
- register ggml_fp16_t r;
- __asm__( /* xscvdphp can work on double or single precision */
- "xscvdphp %0,%2\n"
- "mffprd %1,%0\n" :
- /* temp */ "=d"(d),
- /* out */ "=r"(r):
- /* in */ "f"(f));
- return r;
-}
-
-#else
-
-// FP16 <-> FP32
-// ref: https://github.com/Maratyszcza/FP16
-
-static inline float fp32_from_bits(uint32_t w) {
- union {
- uint32_t as_bits;
- float as_value;
- } fp32;
- fp32.as_bits = w;
- return fp32.as_value;
-}
-
-static inline uint32_t fp32_to_bits(float f) {
- union {
- float as_value;
- uint32_t as_bits;
- } fp32;
- fp32.as_value = f;
- return fp32.as_bits;
-}
-
-static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
- const uint32_t w = (uint32_t) h << 16;
- const uint32_t sign = w & UINT32_C(0x80000000);
- const uint32_t two_w = w + w;
-
- const uint32_t exp_offset = UINT32_C(0xE0) << 23;
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
- const float exp_scale = 0x1.0p-112f;
-#else
- const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
-#endif
- const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
-
- const uint32_t magic_mask = UINT32_C(126) << 23;
- const float magic_bias = 0.5f;
- const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
-
- const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
- const uint32_t result = sign |
- (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
- return fp32_from_bits(result);
-}
-
-static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
- const float scale_to_inf = 0x1.0p+112f;
- const float scale_to_zero = 0x1.0p-110f;
-#else
- const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
- const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
-#endif
- float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
-
- const uint32_t w = fp32_to_bits(f);
- const uint32_t shl1_w = w + w;
- const uint32_t sign = w & UINT32_C(0x80000000);
- uint32_t bias = shl1_w & UINT32_C(0xFF000000);
- if (bias < UINT32_C(0x71000000)) {
- bias = UINT32_C(0x71000000);
- }
-
- base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
- const uint32_t bits = fp32_to_bits(base);
- const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
- const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
- const uint32_t nonsign = exp_bits + mantissa_bits;
- return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
-}
-
-#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
-#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
-
-#endif // __F16C__
-
-#endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
-
-#ifdef __ARM_FEATURE_SVE
-#include <arm_sve.h>
-#endif // __ARM_FEATURE_SVE
-
-// precomputed f32 table for f16 (256 KB)
-// defined in ggml.c, initialized in ggml_init()
-extern float ggml_table_f32_f16[1 << 16];
-
-// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
-// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
-// This is also true for POWER9.
-#if !defined(GGML_FP16_TO_FP32)
-inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
- uint16_t s;
- memcpy(&s, &f, sizeof(uint16_t));
- return ggml_table_f32_f16[s];
-}
-
-#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
-#endif
-
-#if !defined(GGML_FP32_TO_FP16)
-#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | ";
s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
- s += "METAL = " + std::to_string(ggml_cpu_has_metal()) + " | ";
s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
- s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | ";
s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
- s += "CUDA = " + std::to_string(ggml_cpu_has_cuda()) + " | ";
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";
- s += "CANN = " + std::to_string(ggml_cpu_has_cann()) ;
+
return s.c_str();
}