From: Mathieu Baudier <redacted>
Date: Mon, 16 Feb 2026 06:38:20 +0000 (+0100)
Subject: Introduce ggml RPC server
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e5565710dc72d42571eafd2725878ddb9cc80d4c;p=pkg%2Fggml%2Fsources%2Fllama.cpp

Introduce ggml RPC server
---

diff --git a/debian/changelog b/debian/changelog
index 77f3aa02f..dac83a830 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
 llama.cpp (0.0.8067-1) unstable; urgency=medium
 
   * Update upstream
+  * Introduce ggml RPC server
 
  -- Mathieu Baudier <mbaudier@argeo.org>  Mon, 16 Feb 2026 06:10:48 +0000
 
diff --git a/debian/control b/debian/control
index 35beb140b..8ef264390 100644
--- a/debian/control
+++ b/debian/control
@@ -21,11 +21,9 @@ Architecture: any
 Multi-Arch: same
 Pre-Depends: ${misc:Pre-Depends}
 Depends: libggml0,
-         libggml-backend-cpu,
+         libggml0-backend-cpu,
          ${misc:Depends},
          ${shlibs:Depends},
-# Explicitly conflict with Debian official
-Conflicts: llama.cpp
 Description: Inference of large language models in pure C/C++ (shared library)
  llama.cpp leverages the ggml tensor library in order to run
  large language models (LLMs) provided in the GGUF file format.
@@ -38,8 +36,6 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends: libllama0 (= ${binary:Version}),
          ${misc:Depends},
          ${shlibs:Depends},
-# Explicitly conflict with Debian official
-Conflicts: llama.cpp
 Description: Inference of large language models in pure C/C++ (multimodal library)
  mtmd provides multimodal inference.
 
@@ -55,6 +51,10 @@ Description: Inference of large language models in pure C/C++ (tools)
  It typically allows one to run one-shot prompts or to "chat"
  with a large language model.
  .
+ llama-completion: simple tool for text-only inference.
+ .
+ llama-mtmd-cli: simple tool for vision inference.
+ .
  llama-quantize: utility to "quantize" a large language model
  GGUF file. Quantizing is the process of reducing the precision of
  the underlying neural-network at  aminimal cost to its accuracy.
@@ -64,6 +64,15 @@ Description: Inference of large language models in pure C/C++ (tools)
  .
  llama-server: HTTP server support.
 
+# This should actually be in ggml
+Package: llama.cpp-rpc
+Architecture: any
+Depends: libggml0-backend-rpc,
+         ${misc:Depends},
+         ${shlibs:Depends},
+Description: Inference of large language models in pure C/C++ (RPC server)
+ ggml-rpc-server: ggml RPC server.
+
 Package: libllama-dev
 Section: libdevel
 Architecture: any
diff --git a/debian/llama.cpp-rpc.install b/debian/llama.cpp-rpc.install
new file mode 100644
index 000000000..0760fb5c4
--- /dev/null
+++ b/debian/llama.cpp-rpc.install
@@ -0,0 +1,2 @@
+/usr/bin/ggml-rpc-server
+/usr/libexec/*/ggml/ggml-rpc-server
diff --git a/debian/rules b/debian/rules
index 9ccc5bee2..84c822df0 100755
--- a/debian/rules
+++ b/debian/rules
@@ -79,6 +79,10 @@ execute_after_dh_auto_install-arch:
 		[ "$$progname_base" != "llama-cli" ] || continue ; \
 		ln -r -s completions/llama-cli completions/$$progname_base; \
 	done
+	
+	# RPC server
+	mv debian/tmp/usr/bin/rpc-server debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server
+	ln -s --relative -t debian/tmp/usr/bin debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server
 
 # No tests for now, as many need some kind of model we don't have
 override_dh_auto_test: