]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
model-conversion : add trust_remote_code for embedding scripts (#18288)
authorDaniel Bevenius <redacted>
Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)
committerGitHub <redacted>
Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)
This commit adds the trust_remote_code=True parameter when loading
models and configurations in the embedding model conversion scripts.
It also adds a cast to float for models that might use a data type that
is not supported by python, for example bfloat16.

The motivation for this is that some models may require custom code to
be executed during loading, and setting trust_remote_code to True avoids
getting prompted for confirmation.

Future work will consolidate the embedding conversion scripts with the
causal conversion scripts to avoid code duplication. But in the mean
time it would be nice to have this fix in place.

examples/model-conversion/scripts/embedding/run-original-model.py
examples/model-conversion/scripts/utils/semantic_check.py

index 640e200a97dc38157cc0b1f23b516fe6dfefd5f2..39f054d0e023d7688c3b6566c43046cd7c9b3dde 100755 (executable)
@@ -45,7 +45,7 @@ if use_sentence_transformers:
 else:
     tokenizer = AutoTokenizer.from_pretrained(model_path)
 
-    config = AutoConfig.from_pretrained(model_path)
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
 
     # This can be used to override the sliding window size for manual testing. This
     # can be useful to verify the sliding window attention mask in the original model
@@ -64,12 +64,12 @@ else:
 
         try:
             model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
-            model = model_class.from_pretrained(model_path, config=config)
+            model = model_class.from_pretrained(model_path, config=config, trust_remote_code=True)
         except (ImportError, AttributeError) as e:
             print(f"Failed to import or load model: {e}")
             exit(1)
     else:
-        model = AutoModel.from_pretrained(model_path, config=config)
+        model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
     print(f"Model class: {type(model)}")
     print(f"Model file: {type(model).__module__}")
 
@@ -123,7 +123,7 @@ with torch.no_grad():
         outputs = model(**encoded)
         hidden_states = outputs.last_hidden_state  # Shape: [batch_size, seq_len, hidden_size]
 
-        all_embeddings = hidden_states[0].cpu().numpy()  # Shape: [seq_len, hidden_size]
+        all_embeddings = hidden_states[0].float().cpu().numpy()  # Shape: [seq_len, hidden_size]
 
         print(f"Hidden states shape: {hidden_states.shape}")
         print(f"All embeddings shape: {all_embeddings.shape}")
index 2ac8b6b7b42cb0e42f60f3c74a4a3dd6714e947f..e64c0004974e63643a06f887c54811d6e25c2732 100644 (file)
@@ -166,7 +166,7 @@ def main():
     # Load the python model to get configuration information and also to load the tokenizer.
     print("Loading model and tokenizer using AutoTokenizer:", args.model_path)
     tokenizer = AutoTokenizer.from_pretrained(args.model_path)
-    config = AutoConfig.from_pretrained(args.model_path)
+    config = AutoConfig.from_pretrained(args.model_path, trust_remote_code=True)
 
     if unreleased_model_name:
         model_name_lower = unreleased_model_name.lower()
@@ -186,9 +186,9 @@ def main():
             exit(1)
     else:
         if args.causal:
-            model = AutoModelForCausalLM.from_pretrained(args.model_path)
+            model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True)
         else:
-            model = AutoModel.from_pretrained(args.model_path)
+            model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)
 
     encoded = tokenizer(prompt, return_tensors="pt")
     tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])