mtmd : fix Pixtral OOM with large images by capping image_size to 1024 (#14326)

author yuiseki <redacted>

Sun, 22 Jun 2025 12:44:57 +0000 (21:44 +0900)

committer GitHub <redacted>

Sun, 22 Jun 2025 12:44:57 +0000 (14:44 +0200)
author yuiseki <redacted>
Sun, 22 Jun 2025 12:44:57 +0000 (21:44 +0900)
committer GitHub <redacted>
Sun, 22 Jun 2025 12:44:57 +0000 (14:44 +0200)
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp

index 30283d6f1f032e70a2fe3a4797fedc0566ae6c5b..a990520ed3fbbc3c006a8d9107d3655b8419f0ac 100644 (file)
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@@ -2211,6 +2211,9 @@ struct clip_model_loader {
                      {
                          hparams.rope_theta = 10000.0f;
                          hparams.warmup_image_size = hparams.patch_size * 8;
+                        // Mistral Small 2506 needs 1024x1024 image size cap to prevent OOM
+                        // ref: https://github.com/ggml-org/llama.cpp/issues/14310
+                        hparams.image_size = 1024;
                          get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.spatial_merge_size, false);
                      } break;
                  case PROJECTOR_TYPE_GEMMA3: