//GLMV-Edge projection
struct ggml_tensor * mm_model_adapter_conv_w = nullptr;
struct ggml_tensor * mm_model_adapter_conv_b = nullptr;
- struct ggml_tensor * boi_w = nullptr;
- struct ggml_tensor * eoi_w = nullptr;
// MobileVLM projection
struct ggml_tensor * mm_model_mlp_1_w = nullptr;
vision_model.mm_model_mlp_1_w = get_tensor(string_format(TN_GLM_ADAPTER_D_H_2_4H,"weight"));
vision_model.mm_model_mlp_2_w = get_tensor(string_format(TN_GLM_ADAPTER_GATE,"weight"));
vision_model.mm_model_mlp_3_w = get_tensor(string_format(TN_GLM_ADAPTER_D_4H_2_H,"weight"));
- vision_model.boi_w = get_tensor(TN_GLM_BOI_W);
- vision_model.eoi_w = get_tensor(TN_GLM_EOI_W);
} break;
case PROJECTOR_TYPE_MERGER:
{
}
size_t clip_embd_nbytes(const struct clip_ctx * ctx) {
- int extra_tokens = ctx->has_glm_projector ? 2 : 0;
- return (clip_n_patches(ctx) + extra_tokens) * clip_n_mmproj_embd(ctx) * sizeof(float);
+ return clip_n_patches(ctx) * clip_n_mmproj_embd(ctx) * sizeof(float);
}
size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w) {
}
if (ctx->has_glm_projector) {
GGML_ASSERT(batch_size == 1);
- ggml_tensor * boi = ctx->vision_model.boi_w;
- ggml_backend_tensor_get(boi,vec,0,ggml_nbytes(boi));
- vec = (float*)(vec+ggml_nelements(boi)); //offset for boi
}
// build the inference graph
// copy the embeddings to the location passed by the user
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
- if (ctx->has_glm_projector) {
- //eoi
- ggml_tensor * eoi = ctx->vision_model.eoi_w;
- int offset = ggml_nelements(embeddings);
- ggml_backend_tensor_get(eoi, vec+offset, 0, ggml_nbytes(eoi));
- }
-
return true;
}
marker_modified = "<start_of_image>" + ctx->image_marker + "<end_of_image>";
string_replace_all(prompt_modified, ctx->image_marker, marker_modified);
+ } else if (proj_type == PROJECTOR_TYPE_GLM_EDGE) {
+ // <|begin_of_image|> ... (image embeddings) ... <|end_of_image|>
+ marker_modified = "<|begin_of_image|>" + ctx->image_marker + "<|end_of_image|>";
+ string_replace_all(prompt_modified, ctx->image_marker, marker_modified);
+
} else if (proj_type == PROJECTOR_TYPE_IDEFICS3) {
// https://github.com/huggingface/transformers/blob/a42ba80fa520c784c8f11a973ca9034e5f859b79/src/transformers/models/idefics3/processing_idefics3.py#L192-L215
marker_modified = "<fake_token_around_image><global-img>" + ctx->image_marker + "<fake_token_around_image>";