From: Daniel Bevenius <redacted>
Date: Tue, 23 Dec 2025 06:27:37 +0000 (+0100)
Subject: model-conversion : add trust_remote_code for embedding scripts (#18288)
X-Git-Tag: upstream/0.0.7599~81
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=847c35f7d5cbc686dfd5ae70f5702bafb84a8be9;p=pkg%2Fggml%2Fsources%2Fllama.cpp

model-conversion : add trust_remote_code for embedding scripts (#18288)

This commit adds the trust_remote_code=True parameter when loading
models and configurations in the embedding model conversion scripts.
It also adds a cast to float for models that might use a data type that
is not supported by python, for example bfloat16.

The motivation for this is that some models may require custom code to
be executed during loading, and setting trust_remote_code to True avoids
getting prompted for confirmation.

Future work will consolidate the embedding conversion scripts with the
causal conversion scripts to avoid code duplication. But in the mean
time it would be nice to have this fix in place.
---

diff --git a/examples/model-conversion/scripts/embedding/run-original-model.py b/examples/model-conversion/scripts/embedding/run-original-model.py
index 640e200a9..39f054d0e 100755
--- a/examples/model-conversion/scripts/embedding/run-original-model.py
+++ b/examples/model-conversion/scripts/embedding/run-original-model.py
@@ -45,7 +45,7 @@ if use_sentence_transformers:
 else:
     tokenizer = AutoTokenizer.from_pretrained(model_path)
 
-    config = AutoConfig.from_pretrained(model_path)
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
 
     # This can be used to override the sliding window size for manual testing. This
     # can be useful to verify the sliding window attention mask in the original model
@@ -64,12 +64,12 @@ else:
 
         try:
             model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
-            model = model_class.from_pretrained(model_path, config=config)
+            model = model_class.from_pretrained(model_path, config=config, trust_remote_code=True)
         except (ImportError, AttributeError) as e:
             print(f"Failed to import or load model: {e}")
             exit(1)
     else:
-        model = AutoModel.from_pretrained(model_path, config=config)
+        model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
     print(f"Model class: {type(model)}")
     print(f"Model file: {type(model).__module__}")
 
@@ -123,7 +123,7 @@ with torch.no_grad():
         outputs = model(**encoded)
         hidden_states = outputs.last_hidden_state  # Shape: [batch_size, seq_len, hidden_size]
 
-        all_embeddings = hidden_states[0].cpu().numpy()  # Shape: [seq_len, hidden_size]
+        all_embeddings = hidden_states[0].float().cpu().numpy()  # Shape: [seq_len, hidden_size]
 
         print(f"Hidden states shape: {hidden_states.shape}")
         print(f"All embeddings shape: {all_embeddings.shape}")
diff --git a/examples/model-conversion/scripts/utils/semantic_check.py b/examples/model-conversion/scripts/utils/semantic_check.py
index 2ac8b6b7b..e64c00049 100644
--- a/examples/model-conversion/scripts/utils/semantic_check.py
+++ b/examples/model-conversion/scripts/utils/semantic_check.py
@@ -166,7 +166,7 @@ def main():
     # Load the python model to get configuration information and also to load the tokenizer.
     print("Loading model and tokenizer using AutoTokenizer:", args.model_path)
     tokenizer = AutoTokenizer.from_pretrained(args.model_path)
-    config = AutoConfig.from_pretrained(args.model_path)
+    config = AutoConfig.from_pretrained(args.model_path, trust_remote_code=True)
 
     if unreleased_model_name:
         model_name_lower = unreleased_model_name.lower()
@@ -186,9 +186,9 @@ def main():
             exit(1)
     else:
         if args.causal:
-            model = AutoModelForCausalLM.from_pretrained(args.model_path)
+            model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True)
         else:
-            model = AutoModel.from_pretrained(args.model_path)
+            model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)
 
     encoded = tokenizer(prompt, return_tensors="pt")
     tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])