model : add support for JinaBertModel with non-gated ffn (#18475)

author o7si <redacted>

Thu, 1 Jan 2026 17:38:51 +0000 (01:38 +0800)

committer GitHub <redacted>

Thu, 1 Jan 2026 17:38:51 +0000 (18:38 +0100)
author o7si <redacted>
Thu, 1 Jan 2026 17:38:51 +0000 (01:38 +0800)
committer GitHub <redacted>
Thu, 1 Jan 2026 17:38:51 +0000 (18:38 +0100)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 2c961b8f5961ca6537df9f789527c905f8e5d1ea..f26f4de6523157560aee4380c651c4d87ddcfd8d 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1062,6 +1062,9 @@ class TextModel(ModelBase):
          if chkhsh == "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273":
              # ref: https://huggingface.co/alvarobartt/grok-2-tokenizer
              res = "grok-2"
+        if chkhsh == "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df":
+            # ref: https://huggingface.co/aari1995/German_Semantic_V3
+            res = "jina-v2-de"
          if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
              # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
              res = "llama-bpe"
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py

index b1ae4105ed4c15d693b1bb89c421c8927b80cab7..243cf8a29b8fa092c47a96c0df3d8928b5c91caf 100755 (executable)
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -166,6 +166,8 @@ pre_computed_hashes = [
      {"name": "kimi-k2",   "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base",   "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
      {"name": "qwen2",     "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
      {"name": "grok-2",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
+    # jina-v2-de variants
+    {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
  ]
  
  
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index dfb5c0ce82b10ec2bdc258ef127cc36b44657146..d8b1221df5ac94b0219c9162b8e40bdbf07afef4 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -3322,7 +3322,14 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                          layer.attn_norm_2_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "bias",   i), {n_embd}, TENSOR_NOT_REQUIRED);
  
                          layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
-                        layer.ffn_up   = create_tensor(tn(LLM_TENSOR_FFN_UP,   "weight", i), {n_embd, layer.ffn_gate ? n_ff : n_ff * 2}, 0);
+
+                        const auto tn_ffn_up_weight = tn(LLM_TENSOR_FFN_UP, "weight", i);
+                        ggml_tensor * t_ffn_up = ml.get_tensor_meta(tn_ffn_up_weight.str().c_str());
+                        const int64_t n_ffn_up = t_ffn_up ? t_ffn_up->ne[1] : n_ff;
+
+                        GGML_ASSERT(n_ffn_up == n_ff || n_ffn_up == n_ff * 2);
+                        layer.ffn_up   = create_tensor(tn_ffn_up_weight, {n_embd, n_ffn_up}, 0);
+                        layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ffn_up}, TENSOR_NOT_REQUIRED);
  
                          layer.ffn_down   = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
                          layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias",   i), {n_embd}, 0);
diff --git a/src/models/bert.cpp b/src/models/bert.cpp

index 3274fa3b99dd1a2a042b9a102f5597fe77b61d4b..bca0e254fc51bc875abe8a65c224b37b6d89476e 100644 (file)
--- a/src/models/bert.cpp
+++ b/src/models/bert.cpp
@@ -142,11 +142,13 @@ llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params
                      LLM_FFN_GELU, LLM_FFN_SEQ, il);
              cb(cur, "ffn_out", il);
          } else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
+            const bool up_contains_gate = !model.layers[il].ffn_gate && model.layers[il].ffn_up->ne[1] != hparams.n_ff();
+            auto type_op = up_contains_gate ? LLM_FFN_GEGLU : LLM_FFN_GELU;
              cur = build_ffn(cur,
-                    model.layers[il].ffn_up, NULL, NULL,
+                    model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
                      model.layers[il].ffn_gate, NULL, NULL,
                      model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, NULL,
-                    model.layers[il].ffn_gate ? LLM_FFN_GELU : LLM_FFN_GEGLU, LLM_FFN_PAR, il);
+                    type_op, LLM_FFN_PAR, il);
              cb(cur, "ffn_out", il);
          } else {
              cur = build_ffn(cur,
author	o7si <redacted>
	Thu, 1 Jan 2026 17:38:51 +0000 (01:38 +0800)
committer	GitHub <redacted>
	Thu, 1 Jan 2026 17:38:51 +0000 (18:38 +0100)
convert_hf_to_gguf.py		patch \| blob \| history
convert_hf_to_gguf_update.py		patch \| blob \| history
src/llama-model.cpp		patch \| blob \| history
src/models/bert.cpp		patch \| blob \| history