From: welix Date: Thu, 8 May 2025 13:03:53 +0000 (+0900) Subject: mtmd : fix the calculation of n_tokens for smolvlm (#13381) X-Git-Tag: upstream/0.0.5318~5 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=0ccc1213549e39ef4c1affb1bf5f49651ef4ce48;p=pkg%2Fggml%2Fsources%2Fllama.cpp mtmd : fix the calculation of n_tokens for smolvlm (#13381) Co-authored-by: Taichi Nishimura --- diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 4432fb71..4e1a7328 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -3010,7 +3010,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im int n_per_side_2d_pool = n_per_side / params.proj_scale_factor; n_patches = n_per_side_2d_pool * n_per_side_2d_pool; } else if (ctx->proj_type == PROJECTOR_TYPE_IDEFICS3) { - n_patches /= params.proj_scale_factor; + n_patches /= (params.proj_scale_factor * params.proj_scale_factor); } else if (ctx->proj_type == PROJECTOR_TYPE_PIXTRAL) { int n_merge = params.spatial_merge_size; int n_patches_x = img->nx / params.patch_size / (n_merge > 0 ? n_merge : 1);