server : passthrough the /models endpoint during loading (#13535)

author Georgi Gerganov <redacted>

Wed, 14 May 2025 12:42:10 +0000 (15:42 +0300)

committer GitHub <redacted>

Wed, 14 May 2025 12:42:10 +0000 (15:42 +0300)
author Georgi Gerganov <redacted>
Wed, 14 May 2025 12:42:10 +0000 (15:42 +0300)
committer GitHub <redacted>
Wed, 14 May 2025 12:42:10 +0000 (15:42 +0300)
diff --git a/tools/server/README.md b/tools/server/README.md

index 7b944c35ba479b956093691ff0762402ec8bab7a..17ad93df61f87fa125a9eade12df66e7ab53e892 100644 (file)
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -1040,7 +1040,7 @@ To know the `id` of the adapter, use GET `/lora-adapters`
  
  Returns information about the loaded model. See [OpenAI Models API documentation](https://platform.openai.com/docs/api-reference/models).
  
-The returned list always has one single element.
+The returned list always has one single element. The `meta` field can be `null` (for example, while the model is still loading).
  
  By default, model `id` field is the path to model file, specified via `-m`. You can set a custom value for model `id` field via `--alias` argument. For example, `--alias gpt-4o-mini`.
  
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index a9b99d437e2fd237912929b2399400052197e39a..d81579eb1028f60aa719ec4edae00e6b206b3e2f 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -3707,6 +3707,9 @@ int main(int argc, char ** argv) {
              if (req.path == "/" || tmp.back() == "html") {
                  res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
                  res.status = 503;
+            } else if (req.path == "/models" || req.path == "/v1/models") {
+                // allow the models endpoint to be accessed during loading
+                return true;
              } else {
                  res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
              }
@@ -4365,7 +4368,13 @@ int main(int argc, char ** argv) {
          res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
      };
  
-    const auto handle_models = [&params, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
+    const auto handle_models = [&params, &ctx_server, &state, &res_ok](const httplib::Request &, httplib::Response & res) {
+        server_state current_state = state.load();
+        json model_meta = nullptr;
+        if (current_state == SERVER_STATE_READY) {
+            model_meta = ctx_server.model_meta();
+        }
+
          json models = {
              {"object", "list"},
              {"data", {
@@ -4374,7 +4383,7 @@ int main(int argc, char ** argv) {
                      {"object",   "model"},
                      {"created",  std::time(0)},
                      {"owned_by", "llamacpp"},
-                    {"meta",     ctx_server.model_meta()}
+                    {"meta",     model_meta},
                  },
               }}
          };
author	Georgi Gerganov <redacted>
	Wed, 14 May 2025 12:42:10 +0000 (15:42 +0300)
committer	GitHub <redacted>
	Wed, 14 May 2025 12:42:10 +0000 (15:42 +0300)
tools/server/README.md		patch \| blob \| history
tools/server/server.cpp		patch \| blob \| history