ctx_vision = new clip_ctx(ctx_params);
loader.load_hparams(ctx_vision->model, CLIP_MODALITY_VISION);
loader.load_tensors(*ctx_vision);
- loader.warmup(*ctx_vision);
+ if (ctx_params.warmup) {
+ loader.warmup(*ctx_vision);
+ }
}
if (loader.has_audio) {
ctx_audio = new clip_ctx(ctx_params);
loader.load_hparams(ctx_audio->model, CLIP_MODALITY_AUDIO);
loader.load_tensors(*ctx_audio);
- loader.warmup(*ctx_audio);
+ if (ctx_params.warmup) {
+ loader.warmup(*ctx_audio);
+ }
}
} catch (const std::exception & e) {
enum clip_flash_attn_type flash_attn_type;
int image_min_tokens;
int image_max_tokens;
+ bool warmup;
};
struct clip_init_result {
mparams.print_timings = true;
mparams.n_threads = params.cpuparams.n_threads;
mparams.flash_attn_type = params.flash_attn_type;
+ mparams.warmup = params.warmup;
mparams.image_min_tokens = params.image_min_tokens;
mparams.image_max_tokens = params.image_max_tokens;
ctx_vision.reset(mtmd_init_from_file(clip_path, model, mparams));
/* image_marker */ MTMD_DEFAULT_IMAGE_MARKER,
/* media_marker */ mtmd_default_marker(),
/* flash_attn_type */ LLAMA_FLASH_ATTN_TYPE_AUTO,
+ /* warmup */ true,
/* image_min_tokens */ -1,
/* image_max_tokens */ -1,
};
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO,
/* image_min_tokens */ ctx_params.image_min_tokens,
/* image_max_tokens */ ctx_params.image_max_tokens,
+ /* warmup */ ctx_params.warmup,
};
auto res = clip_init(mmproj_fname, ctx_clip_params);
const char * image_marker; // deprecated, use media_marker instead
const char * media_marker;
enum llama_flash_attn_type flash_attn_type;
+ bool warmup; // whether to run a warmup encode pass after initialization
// limit number of image tokens, only for vision models with dynamic resolution
int image_min_tokens; // minimum number of tokens for image input (default: read from metadata)
mparams.print_timings = false;
mparams.n_threads = params_base.cpuparams.n_threads;
mparams.flash_attn_type = params_base.flash_attn_type;
+ mparams.warmup = params_base.warmup;
mparams.image_min_tokens = params_base.image_min_tokens;
mparams.image_max_tokens = params_base.image_max_tokens;
mctx = mtmd_init_from_file(mmproj_path.c_str(), model, mparams);