arg : add --no-mmproj-offload (#13093)

author Xuan-Son Nguyen <redacted>

Thu, 24 Apr 2025 12:04:14 +0000 (14:04 +0200)

committer GitHub <redacted>

Thu, 24 Apr 2025 12:04:14 +0000 (14:04 +0200)
author Xuan-Son Nguyen <redacted>
Thu, 24 Apr 2025 12:04:14 +0000 (14:04 +0200)
committer GitHub <redacted>
Thu, 24 Apr 2025 12:04:14 +0000 (14:04 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index 85ba4111467862d8e8a838dbdc8ae03b103c2809..9cbf985710112ee6b11cf076e900b9aa2fb98048 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2140,6 +2140,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
              params.no_mmproj = true;
          }
      ).set_examples(mmproj_examples));
+    add_opt(common_arg(
+        {"--no-mmproj-offload"},
+        "do not offload multimodal projector to GPU",
+        [](common_params & params) {
+            params.mmproj_use_gpu = false;
+        }
+    ).set_examples(mmproj_examples));
      add_opt(common_arg(
          {"--image"}, "FILE",
          "path to an image file. use with multimodal models. Specify multiple times for batching",
diff --git a/common/common.h b/common/common.h

index 70d3ef8f2787047d097dc712a960d82257ec8569..0a9dc0599f7226dc31b9dcc9f1c15b5c655aaff9 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -342,6 +342,7 @@ struct common_params {
  
      // multimodal models (see examples/llava)
      struct common_params_model mmproj;
+    bool mmproj_use_gpu = true;     // use GPU for multimodal model
      bool no_mmproj = false;         // explicitly disable multimodal model
      std::vector<std::string> image; // path to image file(s)
  
diff --git a/examples/llava/mtmd-cli.cpp b/examples/llava/mtmd-cli.cpp

index 19373760576a95f8b8a85db7c311ea4240cb90a5..250e8c9a9e8714cabbcc3e101773ca8c55da9e51 100644 (file)
--- a/examples/llava/mtmd-cli.cpp
+++ b/examples/llava/mtmd-cli.cpp
@@ -40,7 +40,8 @@ static void show_additional_info(int /*argc*/, char ** argv) {
          "Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> -p <prompt>\n\n"
          "  -m and --mmproj are required\n"
          "  -hf user/repo can replace both -m and --mmproj in most cases\n"
-        "  --image and -p are optional, if NOT provided, the CLI will run in chat mode\n",
+        "  --image and -p are optional, if NOT provided, the CLI will run in chat mode\n"
+        "  to disable using GPU for mmproj model, add --no-mmproj-offload\n",
          argv[0]
      );
  }
@@ -112,10 +113,10 @@ struct mtmd_cli_context {
      void init_vision_context(common_params & params) {
          const char * clip_path = params.mmproj.path.c_str();
          ctx_vision.reset(mtmd_init_from_file(clip_path, model, mtmd_context_params{
-            /* use_gpu */   true,
+            /* use_gpu */   params.mmproj_use_gpu,
              /* timings */   true,
              /* n_threads */ params.cpuparams.n_threads,
-            /* verbosity */ GGML_LOG_LEVEL_INFO,
+            /* verbosity */ params.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO,
          }));
          if (!ctx_vision.get()) {
              LOG_ERR("Failed to load vision model from %s\n", clip_path);
author	Xuan-Son Nguyen <redacted>
	Thu, 24 Apr 2025 12:04:14 +0000 (14:04 +0200)
committer	GitHub <redacted>
	Thu, 24 Apr 2025 12:04:14 +0000 (14:04 +0200)
common/arg.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
examples/llava/mtmd-cli.cpp		patch \| blob \| history