} break;
case PROJECTOR_TYPE_QWEN2VL:
{
- // max image size = sqrt(max_pixels)
- // https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct/blob/main/preprocessor_config.json
- hparams.image_size = 3584;
+ // max image size = sqrt(max_pixels) = 3584
+ // ref: https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct/blob/main/preprocessor_config.json
+ // however, the model use unreasonable memory past 1024 size, we force it to 1024 otherwise it's unusable
+ // ref: https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct/discussions/10
+ hparams.image_size = 1024;
hparams.warmup_image_size = hparams.patch_size * 8;
} break;
case PROJECTOR_TYPE_QWEN25VL:
{
// max image size = sqrt(max_pixels)
// https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct/blob/main/preprocessor_config.json
- hparams.image_size = 3584;
+ // however, the model use unreasonable memory past 1024 size, we force it to 1024 otherwise it's unusable
+ // ref: https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct/discussions/10
+ hparams.image_size = 1024;
hparams.warmup_image_size = hparams.patch_size * 8;
get_u32(KEY_WIN_ATTN_PATTERN, hparams.n_wa_pattern);
} break;