From: Mathieu Baudier Date: Mon, 16 Feb 2026 06:38:20 +0000 (+0100) Subject: Introduce ggml RPC server X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e5565710dc72d42571eafd2725878ddb9cc80d4c;p=pkg%2Fggml%2Fsources%2Fllama.cpp Introduce ggml RPC server --- diff --git a/debian/changelog b/debian/changelog index 77f3aa02f..dac83a830 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,7 @@ llama.cpp (0.0.8067-1) unstable; urgency=medium * Update upstream + * Introduce ggml RPC server -- Mathieu Baudier Mon, 16 Feb 2026 06:10:48 +0000 diff --git a/debian/control b/debian/control index 35beb140b..8ef264390 100644 --- a/debian/control +++ b/debian/control @@ -21,11 +21,9 @@ Architecture: any Multi-Arch: same Pre-Depends: ${misc:Pre-Depends} Depends: libggml0, - libggml-backend-cpu, + libggml0-backend-cpu, ${misc:Depends}, ${shlibs:Depends}, -# Explicitly conflict with Debian official -Conflicts: llama.cpp Description: Inference of large language models in pure C/C++ (shared library) llama.cpp leverages the ggml tensor library in order to run large language models (LLMs) provided in the GGUF file format. @@ -38,8 +36,6 @@ Pre-Depends: ${misc:Pre-Depends} Depends: libllama0 (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends}, -# Explicitly conflict with Debian official -Conflicts: llama.cpp Description: Inference of large language models in pure C/C++ (multimodal library) mtmd provides multimodal inference. @@ -55,6 +51,10 @@ Description: Inference of large language models in pure C/C++ (tools) It typically allows one to run one-shot prompts or to "chat" with a large language model. . + llama-completion: simple tool for text-only inference. + . + llama-mtmd-cli: simple tool for vision inference. + . llama-quantize: utility to "quantize" a large language model GGUF file. Quantizing is the process of reducing the precision of the underlying neural-network at aminimal cost to its accuracy. @@ -64,6 +64,15 @@ Description: Inference of large language models in pure C/C++ (tools) . llama-server: HTTP server support. +# This should actually be in ggml +Package: llama.cpp-rpc +Architecture: any +Depends: libggml0-backend-rpc, + ${misc:Depends}, + ${shlibs:Depends}, +Description: Inference of large language models in pure C/C++ (RPC server) + ggml-rpc-server: ggml RPC server. + Package: libllama-dev Section: libdevel Architecture: any diff --git a/debian/llama.cpp-rpc.install b/debian/llama.cpp-rpc.install new file mode 100644 index 000000000..0760fb5c4 --- /dev/null +++ b/debian/llama.cpp-rpc.install @@ -0,0 +1,2 @@ +/usr/bin/ggml-rpc-server +/usr/libexec/*/ggml/ggml-rpc-server diff --git a/debian/rules b/debian/rules index 9ccc5bee2..84c822df0 100755 --- a/debian/rules +++ b/debian/rules @@ -79,6 +79,10 @@ execute_after_dh_auto_install-arch: [ "$$progname_base" != "llama-cli" ] || continue ; \ ln -r -s completions/llama-cli completions/$$progname_base; \ done + + # RPC server + mv debian/tmp/usr/bin/rpc-server debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server + ln -s --relative -t debian/tmp/usr/bin debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server # No tests for now, as many need some kind of model we don't have override_dh_auto_test: