server: Add ability to mount server at prefix (#14544)

author Alawode Oluwandabira <redacted>

Tue, 8 Jul 2025 08:47:33 +0000 (11:47 +0300)

committer GitHub <redacted>

Tue, 8 Jul 2025 08:47:33 +0000 (11:47 +0300)
author Alawode Oluwandabira <redacted>
Tue, 8 Jul 2025 08:47:33 +0000 (11:47 +0300)
committer GitHub <redacted>
Tue, 8 Jul 2025 08:47:33 +0000 (11:47 +0300)
diff --git a/common/arg.cpp b/common/arg.cpp

index 40af7e574830f6b1d08f25d996ec7331612ceda6..56827a65908beccfb084f678a4d68017492ceb27 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2734,6 +2734,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
              params.public_path = value;
          }
      ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
+    add_opt(common_arg(
+        {"--api-prefix"}, "PREFIX",
+        string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
+        [](common_params & params, const std::string & value) {
+            params.api_prefix = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
      add_opt(common_arg(
          {"--no-webui"},
          string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),
diff --git a/common/common.h b/common/common.h

index 8922090e7b10d50e52f86f9851afdfbdb27c5015..a5abe32859fdd546e0b6ebf14678ac5094e6aa0b 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -370,6 +370,7 @@ struct common_params {
  
      std::string hostname      = "127.0.0.1";
      std::string public_path   = "";                                                                         // NOLINT
+    std::string api_prefix    = "";                                                                         // NOLINT
      std::string chat_template = "";                                                                         // NOLINT
      bool use_jinja = false;                                                                                 // NOLINT
      bool enable_chat_template = true;
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index d3f6271931f62c5b16323e886fc9bea28d35b3af..57b917f2f97b382d92081ef8c9012d105d18feed 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -4806,14 +4806,14 @@ int main(int argc, char ** argv) {
          // register static assets routes
          if (!params.public_path.empty()) {
              // Set the base directory for serving static files
-            bool is_found = svr->set_mount_point("/", params.public_path);
+            bool is_found = svr->set_mount_point(params.api_prefix + "/", params.public_path);
              if (!is_found) {
                  LOG_ERR("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
                  return 1;
              }
          } else {
              // using embedded static index.html
-            svr->Get("/", [](const httplib::Request & req, httplib::Response & res) {
+            svr->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) {
                  if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
                      res.set_content("Error: gzip is not supported by this browser", "text/plain");
                  } else {
@@ -4829,37 +4829,37 @@ int main(int argc, char ** argv) {
      }
  
      // register API routes
-    svr->Get ("/health",              handle_health); // public endpoint (no API key check)
-    svr->Get ("/metrics",             handle_metrics);
-    svr->Get ("/props",               handle_props);
-    svr->Post("/props",               handle_props_change);
-    svr->Post("/api/show",            handle_api_show);
-    svr->Get ("/models",              handle_models); // public endpoint (no API key check)
-    svr->Get ("/v1/models",           handle_models); // public endpoint (no API key check)
-    svr->Get ("/api/tags",            handle_models); // ollama specific endpoint. public endpoint (no API key check)
-    svr->Post("/completion",          handle_completions); // legacy
-    svr->Post("/completions",         handle_completions);
-    svr->Post("/v1/completions",      handle_completions_oai);
-    svr->Post("/chat/completions",    handle_chat_completions);
-    svr->Post("/v1/chat/completions", handle_chat_completions);
-    svr->Post("/api/chat",            handle_chat_completions); // ollama specific endpoint
-    svr->Post("/infill",              handle_infill);
-    svr->Post("/embedding",           handle_embeddings); // legacy
-    svr->Post("/embeddings",          handle_embeddings);
-    svr->Post("/v1/embeddings",       handle_embeddings_oai);
-    svr->Post("/rerank",              handle_rerank);
-    svr->Post("/reranking",           handle_rerank);
-    svr->Post("/v1/rerank",           handle_rerank);
-    svr->Post("/v1/reranking",        handle_rerank);
-    svr->Post("/tokenize",            handle_tokenize);
-    svr->Post("/detokenize",          handle_detokenize);
-    svr->Post("/apply-template",      handle_apply_template);
+    svr->Get (params.api_prefix + "/health",              handle_health); // public endpoint (no API key check)
+    svr->Get (params.api_prefix + "/metrics",             handle_metrics);
+    svr->Get (params.api_prefix + "/props",               handle_props);
+    svr->Post(params.api_prefix + "/props",               handle_props_change);
+    svr->Post(params.api_prefix + "/api/show",            handle_api_show);
+    svr->Get (params.api_prefix + "/models",              handle_models); // public endpoint (no API key check)
+    svr->Get (params.api_prefix + "/v1/models",           handle_models); // public endpoint (no API key check)
+    svr->Get (params.api_prefix + "/api/tags",            handle_models); // ollama specific endpoint. public endpoint (no API key check)
+    svr->Post(params.api_prefix + "/completion",          handle_completions); // legacy
+    svr->Post(params.api_prefix + "/completions",         handle_completions);
+    svr->Post(params.api_prefix + "/v1/completions",      handle_completions_oai);
+    svr->Post(params.api_prefix + "/chat/completions",    handle_chat_completions);
+    svr->Post(params.api_prefix + "/v1/chat/completions", handle_chat_completions);
+    svr->Post(params.api_prefix + "/api/chat",            handle_chat_completions); // ollama specific endpoint
+    svr->Post(params.api_prefix + "/infill",              handle_infill);
+    svr->Post(params.api_prefix + "/embedding",           handle_embeddings); // legacy
+    svr->Post(params.api_prefix + "/embeddings",          handle_embeddings);
+    svr->Post(params.api_prefix + "/v1/embeddings",       handle_embeddings_oai);
+    svr->Post(params.api_prefix + "/rerank",              handle_rerank);
+    svr->Post(params.api_prefix + "/reranking",           handle_rerank);
+    svr->Post(params.api_prefix + "/v1/rerank",           handle_rerank);
+    svr->Post(params.api_prefix + "/v1/reranking",        handle_rerank);
+    svr->Post(params.api_prefix + "/tokenize",            handle_tokenize);
+    svr->Post(params.api_prefix + "/detokenize",          handle_detokenize);
+    svr->Post(params.api_prefix + "/apply-template",      handle_apply_template);
      // LoRA adapters hotswap
-    svr->Get ("/lora-adapters",       handle_lora_adapters_list);
-    svr->Post("/lora-adapters",       handle_lora_adapters_apply);
+    svr->Get (params.api_prefix + "/lora-adapters",       handle_lora_adapters_list);
+    svr->Post(params.api_prefix + "/lora-adapters",       handle_lora_adapters_apply);
      // Save & load slots
-    svr->Get ("/slots",               handle_slots);
-    svr->Post("/slots/:id_slot",      handle_slots_action);
+    svr->Get (params.api_prefix + "/slots",               handle_slots);
+    svr->Post(params.api_prefix + "/slots/:id_slot",      handle_slots_action);
  
      //
      // Start the server
author	Alawode Oluwandabira <redacted>
	Tue, 8 Jul 2025 08:47:33 +0000 (11:47 +0300)
committer	GitHub <redacted>
	Tue, 8 Jul 2025 08:47:33 +0000 (11:47 +0300)
common/arg.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
tools/server/server.cpp		patch \| blob \| history