hparams.set_warmup_n_tokens(16*16);
} break;
case PROJECTOR_TYPE_PIXTRAL:
- case PROJECTOR_TYPE_LIGHTONOCR:
{
// ref: https://huggingface.co/mistral-community/pixtral-12b/blob/main/preprocessor_config.json
// TODO: verify the image_min_tokens
hparams.set_limit_image_tokens(8, 1024);
hparams.set_warmup_n_tokens(256); // avoid OOM on warmup
} break;
+ case PROJECTOR_TYPE_LIGHTONOCR:
+ {
+ hparams.n_merge = 1;
+ hparams.rope_theta = 10000.0f;
+ get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.n_merge, false);
+ hparams.image_longest_edge = hparams.image_size;
+ get_u32(KEY_PREPROC_IMAGE_SIZE, hparams.image_longest_edge, false);
+ hparams.set_warmup_n_tokens(256); // avoid OOM on warmup
+ } break;
case PROJECTOR_TYPE_KIMIVL:
{
hparams.rope_theta = 10000.0f;
case PROJECTOR_TYPE_PHI4:
case PROJECTOR_TYPE_PIXTRAL:
- case PROJECTOR_TYPE_LIGHTONOCR:
{
GGML_ASSERT(params.image_min_pixels > 0 && params.image_max_pixels > 0);
clip_image_u8 resized_image;
normalize_image_u8_to_f32(resized_image, *img_f32, params.image_mean, params.image_std);
res_imgs->entries.push_back(std::move(img_f32));
} break;
+ case PROJECTOR_TYPE_LIGHTONOCR:
+ {
+ GGML_ASSERT(params.image_longest_edge > 0);
+ clip_image_u8 resized_image;
+ const clip_image_size target_size = img_tool::calc_size_preserved_ratio(
+ original_size,
+ params.patch_size * params.n_merge,
+ params.image_longest_edge);
+ img_tool::resize(*img, resized_image, target_size, img_tool::RESIZE_ALGO_BICUBIC);
+ clip_image_f32_ptr img_f32(clip_image_f32_init());
+ normalize_image_u8_to_f32(resized_image, *img_f32, params.image_mean, params.image_std);
+ res_imgs->entries.push_back(std::move(img_f32));
+ } break;
case PROJECTOR_TYPE_LLAMA4:
{