From: Xuan Son Nguyen Date: Tue, 21 Jan 2025 13:07:12 +0000 (+0100) Subject: export-lora : fix tok_embd tensor (#11330) X-Git-Tag: upstream/0.0.4631~108 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e28245f35f2faaf249dd352998b3693a8cc28c51;p=pkg%2Fggml%2Fsources%2Fllama.cpp export-lora : fix tok_embd tensor (#11330) --- diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 99063b5d..91238e4b 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -345,8 +345,18 @@ struct lora_merge_ctx { gf = ggml_new_graph(ctx0); struct ggml_tensor * cur = inp_base; for (size_t i = 0; i < adapters.size(); ++i) { - struct ggml_tensor * a_T = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32))); - struct ggml_tensor * delta = ggml_mul_mat(ctx0, a_T, ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32)); + struct ggml_tensor * delta; + bool is_tok_embd = string_starts_with(name_base, "token_embd"); + if (is_tok_embd) { + printf("%s : detected token embeddings tensor\n", __func__); + delta = ggml_mul_mat(ctx0, + ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32), + ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32)); + } else { + delta = ggml_mul_mat(ctx0, + ggml_cont(ctx0, ggml_transpose(ctx0, ggml_cast(ctx0, inp_a[i], GGML_TYPE_F32))), + ggml_cast(ctx0, inp_b[i], GGML_TYPE_F32)); + } // scale const float alpha = adapters[i]->alpha; const float rank = (float) inp_b[i]->ne[0];