model-conversion : add trust_remote_code for embedding scripts (#18288)

author Daniel Bevenius <redacted>

Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)

committer GitHub <redacted>

Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)
author Daniel Bevenius <redacted>
Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)
committer GitHub <redacted>
Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)
diff --git a/examples/model-conversion/scripts/embedding/run-original-model.py b/examples/model-conversion/scripts/embedding/run-original-model.py

index 640e200a97dc38157cc0b1f23b516fe6dfefd5f2..39f054d0e023d7688c3b6566c43046cd7c9b3dde 100755 (executable)
--- a/examples/model-conversion/scripts/embedding/run-original-model.py
+++ b/examples/model-conversion/scripts/embedding/run-original-model.py
@@ -45,7 +45,7 @@ if use_sentence_transformers:
  else:
      tokenizer = AutoTokenizer.from_pretrained(model_path)
  
-    config = AutoConfig.from_pretrained(model_path)
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
  
      # This can be used to override the sliding window size for manual testing. This
      # can be useful to verify the sliding window attention mask in the original model
@@ -64,12 +64,12 @@ else:
  
          try:
              model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
-            model = model_class.from_pretrained(model_path, config=config)
+            model = model_class.from_pretrained(model_path, config=config, trust_remote_code=True)
          except (ImportError, AttributeError) as e:
              print(f"Failed to import or load model: {e}")
              exit(1)
      else:
-        model = AutoModel.from_pretrained(model_path, config=config)
+        model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
      print(f"Model class: {type(model)}")
      print(f"Model file: {type(model).__module__}")
  
@@ -123,7 +123,7 @@ with torch.no_grad():
          outputs = model(**encoded)
          hidden_states = outputs.last_hidden_state  # Shape: [batch_size, seq_len, hidden_size]
  
-        all_embeddings = hidden_states[0].cpu().numpy()  # Shape: [seq_len, hidden_size]
+        all_embeddings = hidden_states[0].float().cpu().numpy()  # Shape: [seq_len, hidden_size]
  
          print(f"Hidden states shape: {hidden_states.shape}")
          print(f"All embeddings shape: {all_embeddings.shape}")
diff --git a/examples/model-conversion/scripts/utils/semantic_check.py b/examples/model-conversion/scripts/utils/semantic_check.py

index 2ac8b6b7b42cb0e42f60f3c74a4a3dd6714e947f..e64c0004974e63643a06f887c54811d6e25c2732 100644 (file)
--- a/examples/model-conversion/scripts/utils/semantic_check.py
+++ b/examples/model-conversion/scripts/utils/semantic_check.py
@@ -166,7 +166,7 @@ def main():
      # Load the python model to get configuration information and also to load the tokenizer.
      print("Loading model and tokenizer using AutoTokenizer:", args.model_path)
      tokenizer = AutoTokenizer.from_pretrained(args.model_path)
-    config = AutoConfig.from_pretrained(args.model_path)
+    config = AutoConfig.from_pretrained(args.model_path, trust_remote_code=True)
  
      if unreleased_model_name:
          model_name_lower = unreleased_model_name.lower()
@@ -186,9 +186,9 @@ def main():
              exit(1)
      else:
          if args.causal:
-            model = AutoModelForCausalLM.from_pretrained(args.model_path)
+            model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True)
          else:
-            model = AutoModel.from_pretrained(args.model_path)
+            model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)
  
      encoded = tokenizer(prompt, return_tensors="pt")
      tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])
author	Daniel Bevenius <redacted>
	Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)
committer	GitHub <redacted>
	Tue, 23 Dec 2025 06:27:37 +0000 (07:27 +0100)
examples/model-conversion/scripts/embedding/run-original-model.py		patch \| blob \| history
examples/model-conversion/scripts/utils/semantic_check.py		patch \| blob \| history