Introduce ggml RPC server

author Mathieu Baudier <redacted>

Mon, 16 Feb 2026 06:38:20 +0000 (07:38 +0100)

committer Mathieu Baudier <redacted>

Mon, 16 Feb 2026 06:46:45 +0000 (07:46 +0100)
author Mathieu Baudier <redacted>
Mon, 16 Feb 2026 06:38:20 +0000 (07:38 +0100)
committer Mathieu Baudier <redacted>
Mon, 16 Feb 2026 06:46:45 +0000 (07:46 +0100)
diff --git a/debian/changelog b/debian/changelog

index 77f3aa02fd071eb8f060e68fc6e68d33d5e318f9..dac83a830f6d0731cd46274d8e12c5cf4803dc65 100644 (file)
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,7 @@
  llama.cpp (0.0.8067-1) unstable; urgency=medium
  
    * Update upstream
+  * Introduce ggml RPC server
  
   -- Mathieu Baudier <mbaudier@argeo.org>  Mon, 16 Feb 2026 06:10:48 +0000
  
diff --git a/debian/control b/debian/control

index 35beb140b289b1ec9fcffff2ff0ee36e2662ff9c..8ef264390e2686464caac55cbdc5cdbbcb0f5500 100644 (file)
--- a/debian/control
+++ b/debian/control
@@ -21,11 +21,9 @@ Architecture: any
  Multi-Arch: same
  Pre-Depends: ${misc:Pre-Depends}
  Depends: libggml0,
-         libggml-backend-cpu,
+         libggml0-backend-cpu,
           ${misc:Depends},
           ${shlibs:Depends},
-# Explicitly conflict with Debian official
-Conflicts: llama.cpp
  Description: Inference of large language models in pure C/C++ (shared library)
   llama.cpp leverages the ggml tensor library in order to run
   large language models (LLMs) provided in the GGUF file format.
@@ -38,8 +36,6 @@ Pre-Depends: ${misc:Pre-Depends}
  Depends: libllama0 (= ${binary:Version}),
           ${misc:Depends},
           ${shlibs:Depends},
-# Explicitly conflict with Debian official
-Conflicts: llama.cpp
  Description: Inference of large language models in pure C/C++ (multimodal library)
   mtmd provides multimodal inference.
  
@@ -55,6 +51,10 @@ Description: Inference of large language models in pure C/C++ (tools)
   It typically allows one to run one-shot prompts or to "chat"
   with a large language model.
   .
+ llama-completion: simple tool for text-only inference.
+ .
+ llama-mtmd-cli: simple tool for vision inference.
+ .
   llama-quantize: utility to "quantize" a large language model
   GGUF file. Quantizing is the process of reducing the precision of
   the underlying neural-network at  aminimal cost to its accuracy.
@@ -64,6 +64,15 @@ Description: Inference of large language models in pure C/C++ (tools)
   .
   llama-server: HTTP server support.
  
+# This should actually be in ggml
+Package: llama.cpp-rpc
+Architecture: any
+Depends: libggml0-backend-rpc,
+         ${misc:Depends},
+         ${shlibs:Depends},
+Description: Inference of large language models in pure C/C++ (RPC server)
+ ggml-rpc-server: ggml RPC server.
+
  Package: libllama-dev
  Section: libdevel
  Architecture: any
diff --git a/debian/llama.cpp-rpc.install b/debian/llama.cpp-rpc.install

new file mode 100644 (file)

index 0000000..0760fb5
--- /dev/null
+++ b/debian/llama.cpp-rpc.install
@@ -0,0 +1,2 @@
+/usr/bin/ggml-rpc-server
+/usr/libexec/*/ggml/ggml-rpc-server
diff --git a/debian/rules b/debian/rules

index 9ccc5bee28450e862d5da2b25f7c065e12090e98..84c822df0e78e7dbe8d6e9c1f2b8f925f74d8607 100755 (executable)
--- a/debian/rules
+++ b/debian/rules
@@ -79,6 +79,10 @@ execute_after_dh_auto_install-arch:
                 [ "$$progname_base" != "llama-cli" ] || continue ; \
                 ln -r -s completions/llama-cli completions/$$progname_base; \
         done
+       
+       # RPC server
+       mv debian/tmp/usr/bin/rpc-server debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server
+       ln -s --relative -t debian/tmp/usr/bin debian/tmp/usr/libexec/$(DEB_TARGET_MULTIARCH)/ggml/ggml-rpc-server
  
  # No tests for now, as many need some kind of model we don't have
  override_dh_auto_test:
author	Mathieu Baudier <redacted>
	Mon, 16 Feb 2026 06:38:20 +0000 (07:38 +0100)
committer	Mathieu Baudier <redacted>
	Mon, 16 Feb 2026 06:46:45 +0000 (07:46 +0100)
debian/changelog		patch \| blob \| history
debian/control		patch \| blob \| history
debian/llama.cpp-rpc.install	[new file with mode: 0644]	patch \| blob
debian/rules		patch \| blob \| history