]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
server: use httplib dynamic threads (#20817)
authorXuan-Son Nguyen <redacted>
Mon, 23 Mar 2026 11:22:46 +0000 (12:22 +0100)
committerGitHub <redacted>
Mon, 23 Mar 2026 11:22:46 +0000 (12:22 +0100)
* server: use httplib dynamic threads

* change to n_threads_http + 1024

tools/server/server-http.cpp

index 129022a7119696fb40b2809fa544101fb06aa9b5..3466512d0c3d2b7bf448f6c26234695316a1face 100644 (file)
@@ -227,11 +227,17 @@ bool server_http_context::init(const common_params & params) {
 
     int n_threads_http = params.n_threads_http;
     if (n_threads_http < 1) {
-        // +2 threads for monitoring endpoints
-        n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
+        // +4 threads for monitoring, health and some threads reserved for MCP and other tasks in the future
+        n_threads_http = std::max(params.n_parallel + 4, (int32_t) std::thread::hardware_concurrency() - 1);
     }
     LOG_INF("%s: using %d threads for HTTP server\n", __func__, n_threads_http);
-    srv->new_task_queue = [n_threads_http] { return new httplib::ThreadPool(n_threads_http); };
+    srv->new_task_queue = [n_threads_http] {
+        // spawn n_threads_http fixed thread (always alive), while allow up to 1024 max possible additional threads
+        // when n_threads_http is used, server will create new "dynamic" threads that will be destroyed after processing each request
+        // ref: https://github.com/yhirose/cpp-httplib/pull/2368
+        size_t max_threads = (size_t)n_threads_http + 1024;
+        return new httplib::ThreadPool(n_threads_http, max_threads);
+    };
 
     //
     // Web UI setup