server: add --media-path for local media files (#17697)

author Xuan-Son Nguyen <redacted>

Tue, 2 Dec 2025 21:49:20 +0000 (22:49 +0100)

committer GitHub <redacted>

Tue, 2 Dec 2025 21:49:20 +0000 (22:49 +0100)
author Xuan-Son Nguyen <redacted>
Tue, 2 Dec 2025 21:49:20 +0000 (22:49 +0100)
committer GitHub <redacted>
Tue, 2 Dec 2025 21:49:20 +0000 (22:49 +0100)
diff --git a/common/arg.cpp b/common/arg.cpp

index 79b7e23ec3ce200abfe6f8578b5808fc57f3c1fd..801f06e37c2f32726868fbb0d2e7d1ddc012f841 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2488,12 +2488,29 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          "path to save slot kv cache (default: disabled)",
          [](common_params & params, const std::string & value) {
              params.slot_save_path = value;
+            if (!fs_is_directory(params.slot_save_path)) {
+                throw std::invalid_argument("not a directory: " + value);
+            }
              // if doesn't end with DIRECTORY_SEPARATOR, add it
              if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) {
                  params.slot_save_path += DIRECTORY_SEPARATOR;
              }
          }
      ).set_examples({LLAMA_EXAMPLE_SERVER}));
+    add_opt(common_arg(
+        {"--media-path"}, "PATH",
+        "directory for loading local media files; files can be accessed via file:// URLs using relative paths (default: disabled)",
+        [](common_params & params, const std::string & value) {
+            params.media_path = value;
+            if (!fs_is_directory(params.media_path)) {
+                throw std::invalid_argument("not a directory: " + value);
+            }
+            // if doesn't end with DIRECTORY_SEPARATOR, add it
+            if (!params.media_path.empty() && params.media_path[params.media_path.size() - 1] != DIRECTORY_SEPARATOR) {
+                params.media_path += DIRECTORY_SEPARATOR;
+            }
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
      add_opt(common_arg(
          {"--models-dir"}, "PATH",
          "directory containing models for the router server (default: disabled)",
diff --git a/common/common.cpp b/common/common.cpp

index 10001f54697d788fb30bd89b54e25465bd2b00a8..93e1dcfb9cb11b740a203210f6035375a4a63818 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -694,7 +694,7 @@ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_over
  
  // Validate if a filename is safe to use
  // To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
-bool fs_validate_filename(const std::string & filename) {
+bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
      if (!filename.length()) {
          // Empty filename invalid
          return false;
@@ -754,10 +754,14 @@ bool fs_validate_filename(const std::string & filename) {
              || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
              || c == 0xFFFD // Replacement Character (UTF-8)
              || c == 0xFEFF // Byte Order Mark (BOM)
-            || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
+            || c == ':' || c == '*' // Illegal characters
              || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
              return false;
          }
+        if (!allow_subdirs && (c == '/' || c == '\\')) {
+            // Subdirectories not allowed, reject path separators
+            return false;
+        }
      }
  
      // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
@@ -859,6 +863,11 @@ bool fs_create_directory_with_parents(const std::string & path) {
  #endif // _WIN32
  }
  
+bool fs_is_directory(const std::string & path) {
+    std::filesystem::path dir(path);
+    return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
+}
+
  std::string fs_get_cache_directory() {
      std::string cache_directory = "";
      auto ensure_trailing_slash = [](std::string p) {
diff --git a/common/common.h b/common/common.h

index cdca5e26a2314802f7e82744120a002df1d85f5c..6e6b2c1cab69d0128e535df2ebc47ca580131bb7 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -485,6 +485,7 @@ struct common_params {
      bool log_json = false;
  
      std::string slot_save_path;
+    std::string media_path; // path to directory for loading media files
  
      float slot_prompt_similarity = 0.1f;
  
@@ -635,8 +636,9 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
  // Filesystem utils
  //
  
-bool fs_validate_filename(const std::string & filename);
+bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
  bool fs_create_directory_with_parents(const std::string & path);
+bool fs_is_directory(const std::string & path);
  
  std::string fs_get_cache_directory();
  std::string fs_get_cache_file(const std::string & filename);
diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp

index 6a42c3926ae02646783ad9d3afafdd6f6b64e3ba..e2e41a0d51bdca13ac91b4286c9f3988d5d9726f 100644 (file)
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -11,6 +11,7 @@
  
  #include <random>
  #include <sstream>
+#include <fstream>
  
  json format_error_response(const std::string & message, const enum error_type type) {
      std::string type_str;
@@ -774,6 +775,65 @@ json oaicompat_completion_params_parse(const json & body) {
      return llama_params;
  }
  
+// media_path always end with '/', see arg.cpp
+static void handle_media(
+        std::vector<raw_buffer> & out_files,
+        json & media_obj,
+        const std::string & media_path) {
+    std::string url = json_value(media_obj, "url", std::string());
+    if (string_starts_with(url, "http")) {
+        // download remote image
+        // TODO @ngxson : maybe make these params configurable
+        common_remote_params params;
+        params.headers.push_back("User-Agent: llama.cpp/" + build_info);
+        params.max_size = 1024 * 1024 * 10; // 10MB
+        params.timeout  = 10; // seconds
+        SRV_INF("downloading image from '%s'\n", url.c_str());
+        auto res = common_remote_get_content(url, params);
+        if (200 <= res.first && res.first < 300) {
+            SRV_INF("downloaded %ld bytes\n", res.second.size());
+            raw_buffer data;
+            data.insert(data.end(), res.second.begin(), res.second.end());
+            out_files.push_back(data);
+        } else {
+            throw std::runtime_error("Failed to download image");
+        }
+
+    } else if (string_starts_with(url, "file://")) {
+        if (media_path.empty()) {
+            throw std::invalid_argument("file:// URLs are not allowed unless --media-path is specified");
+        }
+        // load local image file
+        std::string file_path = url.substr(7); // remove "file://"
+        raw_buffer data;
+        if (!fs_validate_filename(file_path, true)) {
+            throw std::invalid_argument("file path is not allowed: " + file_path);
+        }
+        SRV_INF("loading image from local file '%s'\n", (media_path + file_path).c_str());
+        std::ifstream file(media_path + file_path, std::ios::binary);
+        if (!file) {
+            throw std::invalid_argument("file does not exist or cannot be opened: " + file_path);
+        }
+        data.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
+        out_files.push_back(data);
+
+    } else {
+        // try to decode base64 image
+        std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
+        if (parts.size() != 2) {
+            throw std::runtime_error("Invalid url value");
+        } else if (!string_starts_with(parts[0], "data:image/")) {
+            throw std::runtime_error("Invalid url format: " + parts[0]);
+        } else if (!string_ends_with(parts[0], "base64")) {
+            throw std::runtime_error("url must be base64 encoded");
+        } else {
+            auto base64_data = parts[1];
+            auto decoded_data = base64_decode(base64_data);
+            out_files.push_back(decoded_data);
+        }
+    }
+}
+
  // used by /chat/completions endpoint
  json oaicompat_chat_params_parse(
      json & body, /* openai api json semantics */
@@ -860,41 +920,8 @@ json oaicompat_chat_params_parse(
                      throw std::runtime_error("image input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
                  }
  
-                json image_url  = json_value(p, "image_url", json::object());
-                std::string url = json_value(image_url, "url", std::string());
-                if (string_starts_with(url, "http")) {
-                    // download remote image
-                    // TODO @ngxson : maybe make these params configurable
-                    common_remote_params params;
-                    params.headers.push_back("User-Agent: llama.cpp/" + build_info);
-                    params.max_size = 1024 * 1024 * 10; // 10MB
-                    params.timeout  = 10; // seconds
-                    SRV_INF("downloading image from '%s'\n", url.c_str());
-                    auto res = common_remote_get_content(url, params);
-                    if (200 <= res.first && res.first < 300) {
-                        SRV_INF("downloaded %ld bytes\n", res.second.size());
-                        raw_buffer data;
-                        data.insert(data.end(), res.second.begin(), res.second.end());
-                        out_files.push_back(data);
-                    } else {
-                        throw std::runtime_error("Failed to download image");
-                    }
-
-                } else {
-                    // try to decode base64 image
-                    std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
-                    if (parts.size() != 2) {
-                        throw std::invalid_argument("Invalid image_url.url value");
-                    } else if (!string_starts_with(parts[0], "data:image/")) {
-                        throw std::invalid_argument("Invalid image_url.url format: " + parts[0]);
-                    } else if (!string_ends_with(parts[0], "base64")) {
-                        throw std::invalid_argument("image_url.url must be base64 encoded");
-                    } else {
-                        auto base64_data = parts[1];
-                        auto decoded_data = base64_decode(base64_data);
-                        out_files.push_back(decoded_data);
-                    }
-                }
+                json image_url = json_value(p, "image_url", json::object());
+                handle_media(out_files, image_url, opt.media_path);
  
                  // replace this chunk with a marker
                  p["type"] = "text";
@@ -916,6 +943,8 @@ json oaicompat_chat_params_parse(
                  auto decoded_data = base64_decode(data); // expected to be base64 encoded
                  out_files.push_back(decoded_data);
  
+                // TODO: add audio_url support by reusing handle_media()
+
                  // replace this chunk with a marker
                  p["type"] = "text";
                  p["text"] = mtmd_default_marker();
diff --git a/tools/server/server-common.h b/tools/server/server-common.h

index 51ae9ea8a96202941e54d6f7b4af58173fc7ae62..bb04e82b4f5fdfaec836dc4f1c51d26096a72220 100644 (file)
--- a/tools/server/server-common.h
+++ b/tools/server/server-common.h
@@ -284,6 +284,7 @@ struct oaicompat_parser_options {
      bool allow_image;
      bool allow_audio;
      bool enable_thinking = true;
+    std::string media_path;
  };
  
  // used by /chat/completions endpoint
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp

index aac1a70bb2b2bfcf89be4671c2c7864cdba92659..c9245745756a2176b8d61ced4f3e4b1ffd2ed5d2 100644 (file)
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -788,6 +788,7 @@ struct server_context_impl {
              /* allow_image           */ mctx ? mtmd_support_vision(mctx) : false,
              /* allow_audio           */ mctx ? mtmd_support_audio (mctx) : false,
              /* enable_thinking       */ enable_thinking,
+            /* media_path            */ params_base.media_path,
          };
  
          // print sample chat example to make it clear which template is used
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index 16e52a1a0cdffea4e713f62b779d90b68e3a75fd..d5bef3df44597e656644bbb356c0c089eeb84937 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -38,9 +38,11 @@ static server_http_context::handler_t ex_wrapper(server_http_context::handler_t
          try {
              return func(req);
          } catch (const std::invalid_argument & e) {
+            // treat invalid_argument as invalid request (400)
              error = ERROR_TYPE_INVALID_REQUEST;
              message = e.what();
          } catch (const std::exception & e) {
+            // treat other exceptions as server error (500)
              error = ERROR_TYPE_SERVER;
              message = e.what();
          } catch (...) {
diff --git a/tools/server/tests/unit/test_security.py b/tools/server/tests/unit/test_security.py

index e160a8e6d30eb8a0a90ae4b8c08b4dc214dae07a..8c38b89d535e38b9d8a4e2ce26ba2bf248ce1aee 100644 (file)
--- a/tools/server/tests/unit/test_security.py
+++ b/tools/server/tests/unit/test_security.py
@@ -94,3 +94,34 @@ def test_cors_options(origin: str, cors_header: str, cors_header_value: str):
      assert res.status_code == 200
      assert cors_header in res.headers
      assert res.headers[cors_header] == cors_header_value
+
+
+@pytest.mark.parametrize(
+    "media_path, image_url, success",
+    [
+        (None,             "file://mtmd/test-1.jpeg",    False), # disabled media path, should fail
+        ("../../../tools", "file://mtmd/test-1.jpeg",    True),
+        ("../../../tools", "file:////mtmd//test-1.jpeg", True),  # should be the same file as above
+        ("../../../tools", "file://mtmd/notfound.jpeg",  False), # non-existent file
+        ("../../../tools", "file://../mtmd/test-1.jpeg", False), # no directory traversal
+    ]
+)
+def test_local_media_file(media_path, image_url, success,):
+    server = ServerPreset.tinygemma3()
+    server.media_path = media_path
+    server.start()
+    res = server.make_request("POST", "/chat/completions", data={
+        "max_tokens": 1,
+        "messages": [
+            {"role": "user", "content": [
+                {"type": "text", "text": "test"},
+                {"type": "image_url", "image_url": {
+                    "url": image_url,
+                }},
+            ]},
+        ],
+    })
+    if success:
+        assert res.status_code == 200
+    else:
+        assert res.status_code == 400
diff --git a/tools/server/tests/utils.py b/tools/server/tests/utils.py

index afe4f77d9787bdfcc2e5e73a330853170aec2ba5..dfd2c8a260a764e6bd593ad5c0d6247f86ba3d8a 100644 (file)
--- a/tools/server/tests/utils.py
+++ b/tools/server/tests/utils.py
@@ -95,6 +95,7 @@ class ServerProcess:
      chat_template_file: str | None = None
      server_path: str | None = None
      mmproj_url: str | None = None
+    media_path: str | None = None
  
      # session variables
      process: subprocess.Popen | None = None
@@ -217,6 +218,8 @@ class ServerProcess:
              server_args.extend(["--chat-template-file", self.chat_template_file])
          if self.mmproj_url:
              server_args.extend(["--mmproj-url", self.mmproj_url])
+        if self.media_path:
+            server_args.extend(["--media-path", self.media_path])
  
          args = [str(arg) for arg in [server_path, *server_args]]
          print(f"tests: starting server with: {' '.join(args)}")
author	Xuan-Son Nguyen <redacted>
	Tue, 2 Dec 2025 21:49:20 +0000 (22:49 +0100)
committer	GitHub <redacted>
	Tue, 2 Dec 2025 21:49:20 +0000 (22:49 +0100)
common/arg.cpp		patch \| blob \| history
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
tools/server/server-common.cpp		patch \| blob \| history
tools/server/server-common.h		patch \| blob \| history
tools/server/server-context.cpp		patch \| blob \| history
tools/server/server.cpp		patch \| blob \| history
tools/server/tests/unit/test_security.py		patch \| blob \| history
tools/server/tests/utils.py		patch \| blob \| history