Multi-Arch: same
Pre-Depends: ${misc:Pre-Depends}
Depends: libggml0,
- libggml-backend-cpu,
+ libggml0-backend-cpu,
${misc:Depends},
${shlibs:Depends},
-# Explicitly conflict with Debian official
-Conflicts: llama.cpp
Description: Inference of large language models in pure C/C++ (shared library)
llama.cpp leverages the ggml tensor library in order to run
large language models (LLMs) provided in the GGUF file format.
Depends: libllama0 (= ${binary:Version}),
${misc:Depends},
${shlibs:Depends},
-# Explicitly conflict with Debian official
-Conflicts: llama.cpp
Description: Inference of large language models in pure C/C++ (multimodal library)
mtmd provides multimodal inference.
It typically allows one to run one-shot prompts or to "chat"
with a large language model.
.
+ llama-completion: simple tool for text-only inference.
+ .
+ llama-mtmd-cli: simple tool for vision inference.
+ .
llama-quantize: utility to "quantize" a large language model
GGUF file. Quantizing is the process of reducing the precision of
the underlying neural-network at aminimal cost to its accuracy.
.
llama-server: HTTP server support.
+# This should actually be in ggml
+Package: llama.cpp-rpc
+Architecture: any
+Depends: libggml0-backend-rpc,
+ ${misc:Depends},
+ ${shlibs:Depends},
+Description: Inference of large language models in pure C/C++ (RPC server)
+ ggml-rpc-server: ggml RPC server.
+
Package: libllama-dev
Section: libdevel
Architecture: any
[ "$$progname_base" != "llama-cli" ] || continue ; \
ln -r -s completions/llama-cli completions/$$progname_base; \
done
+
+ # RPC server
+ mv debian/tmp/usr/bin/rpc-server debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server
+ ln -s --relative -t debian/tmp/usr/bin debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server
# No tests for now, as many need some kind of model we don't have
override_dh_auto_test: