]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
mtmd : fix the calculation of n_tokens for smolvlm (#13381)
authorwelix <redacted>
Thu, 8 May 2025 13:03:53 +0000 (22:03 +0900)
committerGitHub <redacted>
Thu, 8 May 2025 13:03:53 +0000 (15:03 +0200)
Co-authored-by: Taichi Nishimura <redacted>
tools/mtmd/clip.cpp

index 4432fb7193d7de349b373888e33b15722a11a6e1..4e1a7328725eac854faf01a7a8e8d64d76b64f31 100644 (file)
@@ -3010,7 +3010,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
         int n_per_side_2d_pool = n_per_side / params.proj_scale_factor;
         n_patches = n_per_side_2d_pool * n_per_side_2d_pool;
     } else if (ctx->proj_type == PROJECTOR_TYPE_IDEFICS3) {
-        n_patches /= params.proj_scale_factor;
+        n_patches /= (params.proj_scale_factor * params.proj_scale_factor);
     } else if (ctx->proj_type == PROJECTOR_TYPE_PIXTRAL) {
         int n_merge = params.spatial_merge_size;
         int n_patches_x = img->nx / params.patch_size / (n_merge > 0 ? n_merge : 1);