From: Xuan-Son Nguyen Date: Mon, 23 Mar 2026 11:22:46 +0000 (+0100) Subject: server: use httplib dynamic threads (#20817) X-Git-Tag: upstream/0.0.8611~126 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=31a5cf4c3f5d3af7f16fc4abc9baa75f8d568421;p=pkg%2Fggml%2Fsources%2Fllama.cpp server: use httplib dynamic threads (#20817) * server: use httplib dynamic threads * change to n_threads_http + 1024 --- diff --git a/tools/server/server-http.cpp b/tools/server/server-http.cpp index 129022a71..3466512d0 100644 --- a/tools/server/server-http.cpp +++ b/tools/server/server-http.cpp @@ -227,11 +227,17 @@ bool server_http_context::init(const common_params & params) { int n_threads_http = params.n_threads_http; if (n_threads_http < 1) { - // +2 threads for monitoring endpoints - n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1); + // +4 threads for monitoring, health and some threads reserved for MCP and other tasks in the future + n_threads_http = std::max(params.n_parallel + 4, (int32_t) std::thread::hardware_concurrency() - 1); } LOG_INF("%s: using %d threads for HTTP server\n", __func__, n_threads_http); - srv->new_task_queue = [n_threads_http] { return new httplib::ThreadPool(n_threads_http); }; + srv->new_task_queue = [n_threads_http] { + // spawn n_threads_http fixed thread (always alive), while allow up to 1024 max possible additional threads + // when n_threads_http is used, server will create new "dynamic" threads that will be destroyed after processing each request + // ref: https://github.com/yhirose/cpp-httplib/pull/2368 + size_t max_threads = (size_t)n_threads_http + 1024; + return new httplib::ThreadPool(n_threads_http, max_threads); + }; // // Web UI setup