From: Daniel Bevenius Date: Tue, 23 Dec 2025 06:27:37 +0000 (+0100) Subject: model-conversion : add trust_remote_code for embedding scripts (#18288) X-Git-Tag: upstream/0.0.7599~81 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=847c35f7d5cbc686dfd5ae70f5702bafb84a8be9;p=pkg%2Fggml%2Fsources%2Fllama.cpp model-conversion : add trust_remote_code for embedding scripts (#18288) This commit adds the trust_remote_code=True parameter when loading models and configurations in the embedding model conversion scripts. It also adds a cast to float for models that might use a data type that is not supported by python, for example bfloat16. The motivation for this is that some models may require custom code to be executed during loading, and setting trust_remote_code to True avoids getting prompted for confirmation. Future work will consolidate the embedding conversion scripts with the causal conversion scripts to avoid code duplication. But in the mean time it would be nice to have this fix in place. --- diff --git a/examples/model-conversion/scripts/embedding/run-original-model.py b/examples/model-conversion/scripts/embedding/run-original-model.py index 640e200a..39f054d0 100755 --- a/examples/model-conversion/scripts/embedding/run-original-model.py +++ b/examples/model-conversion/scripts/embedding/run-original-model.py @@ -45,7 +45,7 @@ if use_sentence_transformers: else: tokenizer = AutoTokenizer.from_pretrained(model_path) - config = AutoConfig.from_pretrained(model_path) + config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) # This can be used to override the sliding window size for manual testing. This # can be useful to verify the sliding window attention mask in the original model @@ -64,12 +64,12 @@ else: try: model_class = getattr(importlib.import_module(unreleased_module_path), class_name) - model = model_class.from_pretrained(model_path, config=config) + model = model_class.from_pretrained(model_path, config=config, trust_remote_code=True) except (ImportError, AttributeError) as e: print(f"Failed to import or load model: {e}") exit(1) else: - model = AutoModel.from_pretrained(model_path, config=config) + model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True) print(f"Model class: {type(model)}") print(f"Model file: {type(model).__module__}") @@ -123,7 +123,7 @@ with torch.no_grad(): outputs = model(**encoded) hidden_states = outputs.last_hidden_state # Shape: [batch_size, seq_len, hidden_size] - all_embeddings = hidden_states[0].cpu().numpy() # Shape: [seq_len, hidden_size] + all_embeddings = hidden_states[0].float().cpu().numpy() # Shape: [seq_len, hidden_size] print(f"Hidden states shape: {hidden_states.shape}") print(f"All embeddings shape: {all_embeddings.shape}") diff --git a/examples/model-conversion/scripts/utils/semantic_check.py b/examples/model-conversion/scripts/utils/semantic_check.py index 2ac8b6b7..e64c0004 100644 --- a/examples/model-conversion/scripts/utils/semantic_check.py +++ b/examples/model-conversion/scripts/utils/semantic_check.py @@ -166,7 +166,7 @@ def main(): # Load the python model to get configuration information and also to load the tokenizer. print("Loading model and tokenizer using AutoTokenizer:", args.model_path) tokenizer = AutoTokenizer.from_pretrained(args.model_path) - config = AutoConfig.from_pretrained(args.model_path) + config = AutoConfig.from_pretrained(args.model_path, trust_remote_code=True) if unreleased_model_name: model_name_lower = unreleased_model_name.lower() @@ -186,9 +186,9 @@ def main(): exit(1) else: if args.causal: - model = AutoModelForCausalLM.from_pretrained(args.model_path) + model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True) else: - model = AutoModel.from_pretrained(args.model_path) + model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True) encoded = tokenizer(prompt, return_tensors="pt") tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])