From: Daniel Bevenius Date: Thu, 25 Sep 2025 10:02:36 +0000 (+0200) Subject: model-conversion : add embedding prompt file support (#15871) X-Git-Tag: upstream/0.0.6641~59 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=aa3ee0eb0b80efca126cedf9bcb4fb5864b46ce3;p=pkg%2Fggml%2Fsources%2Fllama.cpp model-conversion : add embedding prompt file support (#15871) This commit adds support for passing a prompt file to the model conversion targets/scripts. It also updates the logits.cpp to print out embedding information in the same format as when running the original embedding model. The motivation for this is that it allows us to pass files of different sizes when running the converted models and validating the logits. This can be particularly important when testing the sliding window functionality of models where the sequence length needs to exceed a certain number of tokens to trigger the sliding window logic. --- diff --git a/examples/model-conversion/Makefile b/examples/model-conversion/Makefile index ac7a4147..f0867cfe 100644 --- a/examples/model-conversion/Makefile +++ b/examples/model-conversion/Makefile @@ -118,13 +118,17 @@ embedding-convert-model: embedding-run-original-model: $(call validate_embedding_model_path,embedding-run-original-model) - @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/embedding/run-original-model.py + @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \ + ./scripts/embedding/run-original-model.py \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") embedding-run-converted-model: - @CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/embedding/run-converted-model.sh ${CONVERTED_EMBEDDING_MODEL} + @./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") embedding-verify-logits: embedding-run-original-model embedding-run-converted-model - @./scripts/embedding/compare-embeddings-logits.sh + @./scripts/embedding/compare-embeddings-logits.sh \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") embedding-inspect-original-model: $(call validate_embedding_model_path,embedding-inspect-original-model) @@ -156,7 +160,8 @@ embedding-quantize-model: $(call quantize_model,$(CONVERTED_EMBEDDING_MODEL),QUANTIZED_EMBEDDING_MODEL) embedding-run-quantized-model: - @./scripts/embedding/run-converted-model.sh ${QUANTIZED_EMBEDDING_MODEL} + @./scripts/embedding/run-converted-model.sh $(QUANTIZED_EMBEDDING_MODEL) \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") ### ### Perplexity targets/recipes diff --git a/examples/model-conversion/logits.cpp b/examples/model-conversion/logits.cpp index ddc5e900..6dc33418 100644 --- a/examples/model-conversion/logits.cpp +++ b/examples/model-conversion/logits.cpp @@ -151,6 +151,35 @@ int main(int argc, char ** argv) { logits = llama_get_embeddings(ctx); n_logits = llama_model_n_embd(model) * batch.n_tokens; type = "-embeddings"; + + const int n_embd = llama_model_n_embd(model); + const int n_embd_count = batch.n_tokens; + + printf("Embedding dimension: %d\n", n_embd); + printf("\n"); + + // Print embeddings in the specified format + for (int j = 0; j < n_embd_count; j++) { + printf("embedding %d: ", j); + + // Print first 3 values + for (int i = 0; i < 3 && i < n_embd; i++) { + printf("%9.6f ", logits[j * n_embd + i]); + } + + printf(" ... "); + + // Print last 3 values + for (int i = n_embd - 3; i < n_embd; i++) { + if (i >= 0) { + printf("%9.6f ", logits[j * n_embd + i]); + } + } + + printf("\n"); + } + printf("\n"); + printf("Embeddings size: %d\n", n_logits); } else { logits = llama_get_logits_ith(ctx, batch.n_tokens - 1); @@ -183,22 +212,23 @@ int main(int argc, char ** argv) { return 1; } for (int i = 0; i < n_logits; i++) { - fprintf(f, "%d: %.6f\n", i, logits[i]); // Added index and changed format + fprintf(f, "%d: %.6f\n", i, logits[i]); } fclose(f); - // Print first and last 10 logits for quick verification - printf("First 10 logits: "); - for (int i = 0; i < 10 && i < n_logits; i++) { - printf("%.6f ", logits[i]); - } - printf("\n"); + if (!embedding_mode) { + printf("First 10 logits: "); + for (int i = 0; i < 10 && i < n_logits; i++) { + printf("%.6f ", logits[i]); + } + printf("\n"); - printf("Last 10 logits: "); - for (int i = n_logits - 10; i < n_logits; i++) { - if (i >= 0) printf("%.6f ", logits[i]); + printf("Last 10 logits: "); + for (int i = n_logits - 10; i < n_logits; i++) { + if (i >= 0) printf("%.6f ", logits[i]); + } + printf("\n\n"); } - printf("\n\n"); printf("Logits saved to %s\n", bin_filename); printf("Logits saved to %s\n", txt_filename); diff --git a/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh b/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh index 1401dcb4..c48af307 100755 --- a/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh +++ b/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh @@ -2,8 +2,37 @@ set -e -MODEL_PATH="${1:-"$EMBEDDING_MODEL_PATH"}" -MODEL_NAME="${2:-$(basename "$MODEL_PATH")}" +# Parse command line arguments +MODEL_PATH="" +MODEL_NAME="" +PROMPTS_FILE="" + +# First argument is always model path +if [ $# -gt 0 ] && [[ "$1" != --* ]]; then + MODEL_PATH="$1" + shift +fi + +# Parse remaining arguments +while [[ $# -gt 0 ]]; do + case $1 in + --prompts-file|-pf) + PROMPTS_FILE="$2" + shift 2 + ;; + *) + # If MODEL_NAME not set and this isn't a flag, use as model name + if [ -z "$MODEL_NAME" ] && [[ "$1" != --* ]]; then + MODEL_NAME="$1" + fi + shift + ;; + esac +done + +# Set defaults +MODEL_PATH="${MODEL_PATH:-"$EMBEDDING_MODEL_PATH"}" +MODEL_NAME="${MODEL_NAME:-$(basename "$MODEL_PATH")}" if [ -t 0 ]; then CPP_EMBEDDINGS="data/llamacpp-${MODEL_NAME}-embeddings.bin" @@ -35,8 +64,18 @@ with open('$TEMP_FILE', 'wb') as f: trap "rm -f $TEMP_FILE" EXIT fi -python scripts/utils/semantic_check.py --model-path $MODEL_PATH \ +# Build the semantic_check.py command +SEMANTIC_CMD="python scripts/utils/semantic_check.py --model-path $MODEL_PATH \ --python-embeddings data/pytorch-${MODEL_NAME}-embeddings.bin \ - --cpp-embeddings $CPP_EMBEDDINGS \ - --prompt "Hello world today" + --cpp-embeddings $CPP_EMBEDDINGS" + +# Add prompts file if specified, otherwise use default prompt +if [ -n "$PROMPTS_FILE" ]; then + SEMANTIC_CMD="$SEMANTIC_CMD --prompts-file \"$PROMPTS_FILE\"" +else + SEMANTIC_CMD="$SEMANTIC_CMD --prompt \"Hello world today\"" +fi + +# Execute the command +eval $SEMANTIC_CMD diff --git a/examples/model-conversion/scripts/embedding/run-converted-model.sh b/examples/model-conversion/scripts/embedding/run-converted-model.sh index 24b28106..f3e26766 100755 --- a/examples/model-conversion/scripts/embedding/run-converted-model.sh +++ b/examples/model-conversion/scripts/embedding/run-converted-model.sh @@ -2,8 +2,27 @@ set -e -# First try command line argument, then environment variable, then file -CONVERTED_MODEL="${1:-"$CONVERTED_EMBEDDING_MODEL"}" +# Parse command line arguments +CONVERTED_MODEL="" +PROMPTS_FILE="" + +while [[ $# -gt 0 ]]; do + case $1 in + -p|--prompts-file) + PROMPTS_FILE="$2" + shift 2 + ;; + *) + if [ -z "$CONVERTED_MODEL" ]; then + CONVERTED_MODEL="$1" + fi + shift + ;; + esac +done + +# First try command line argument, then environment variable +CONVERTED_MODEL="${CONVERTED_MODEL:-"$CONVERTED_EMBEDDING_MODEL"}" # Final check if we have a model path if [ -z "$CONVERTED_MODEL" ]; then @@ -13,8 +32,19 @@ if [ -z "$CONVERTED_MODEL" ]; then exit 1 fi +# Read prompt from file or use default +if [ -n "$PROMPTS_FILE" ]; then + if [ ! -f "$PROMPTS_FILE" ]; then + echo "Error: Prompts file '$PROMPTS_FILE' not found" >&2 + exit 1 + fi + PROMPT=$(cat "$PROMPTS_FILE") +else + PROMPT="Hello world today" +fi + echo $CONVERTED_MODEL cmake --build ../../build --target llama-logits -j8 - -../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "Hello world today" +# TODO: update logits.cpp to accept a --file/-f option for the prompt +../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "$PROMPT" diff --git a/examples/model-conversion/scripts/embedding/run-original-model.py b/examples/model-conversion/scripts/embedding/run-original-model.py index b9db0b89..4a3e1624 100755 --- a/examples/model-conversion/scripts/embedding/run-original-model.py +++ b/examples/model-conversion/scripts/embedding/run-original-model.py @@ -13,14 +13,37 @@ unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME') parser = argparse.ArgumentParser(description='Process model with specified path') parser.add_argument('--model-path', '-m', help='Path to the model') +parser.add_argument('--prompts-file', '-p', help='Path to file containing prompts (one per line)') args = parser.parse_args() +def read_prompt_from_file(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read().strip() + except FileNotFoundError: + print(f"Error: Prompts file '{file_path}' not found") + exit(1) + except Exception as e: + print(f"Error reading prompts file: {e}") + exit(1) + model_path = os.environ.get('EMBEDDING_MODEL_PATH', args.model_path) if model_path is None: parser.error("Model path must be specified either via --model-path argument or EMBEDDING_MODEL_PATH environment variable") tokenizer = AutoTokenizer.from_pretrained(model_path) +config = AutoConfig.from_pretrained(model_path) + +# This can be used to override the sliding window size for manual testing. This +# can be useful to verify the sliding window attention mask in the original model +# and compare it with the converted .gguf model. +if hasattr(config, 'sliding_window'): + original_sliding_window = config.sliding_window + #original_sliding_window = 6 + print(f"Modified sliding window: {original_sliding_window} -> {config.sliding_window}") + +print(f"Using unreleased model: {unreleased_model_name}") if unreleased_model_name: model_name_lower = unreleased_model_name.lower() unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}" @@ -29,19 +52,28 @@ if unreleased_model_name: try: model_class = getattr(importlib.import_module(unreleased_module_path), class_name) - model = model_class.from_pretrained(model_path) # Note: from_pretrained, not fromPretrained + model = model_class.from_pretrained(model_path, config=config) except (ImportError, AttributeError) as e: print(f"Failed to import or load model: {e}") exit(1) else: - model = AutoModel.from_pretrained(model_path) + model = AutoModel.from_pretrained(model_path, config=config) print(f"Model class: {type(model)}") -#print(f"Model file: {type(model).__module__}") -config = AutoConfig.from_pretrained(model_path) +print(f"Model file: {type(model).__module__}") + +# Verify the model is using the correct sliding window +if hasattr(model.config, 'sliding_window'): + print(f"Model's sliding_window: {model.config.sliding_window}") +else: + print("Model config does not have sliding_window attribute") model_name = os.path.basename(model_path) -texts = [ "Hello world today" ] +if args.prompts_file: + prompt_text = read_prompt_from_file(args.prompts_file) + texts = [prompt_text] +else: + texts = ["Hello world today"] encoded = tokenizer( texts, diff --git a/examples/model-conversion/scripts/utils/inspect-org-model.py b/examples/model-conversion/scripts/utils/inspect-org-model.py index ea14947f..bc6f45a5 100755 --- a/examples/model-conversion/scripts/utils/inspect-org-model.py +++ b/examples/model-conversion/scripts/utils/inspect-org-model.py @@ -40,7 +40,7 @@ if os.path.exists(index_path): file_path = os.path.join(model_path, file_name) print(f"\n--- From {file_name} ---") - with safe_open(file_path, framework="pt") as f: # type: ignore + with safe_open(file_path, framework="pt") as f: for tensor_name in sorted(tensor_names): tensor = f.get_tensor(tensor_name) print(f"- {tensor_name} : shape = {tensor.shape}, dtype = {tensor.dtype}") @@ -49,7 +49,7 @@ elif os.path.exists(single_file_path): # Single file model (original behavior) print("Single-file model detected") - with safe_open(single_file_path, framework="pt") as f: # type: ignore + with safe_open(single_file_path, framework="pt") as f: keys = f.keys() print("Tensors in model:") for key in sorted(keys): diff --git a/examples/model-conversion/scripts/utils/semantic_check.py b/examples/model-conversion/scripts/utils/semantic_check.py index d2110480..7fd417bc 100644 --- a/examples/model-conversion/scripts/utils/semantic_check.py +++ b/examples/model-conversion/scripts/utils/semantic_check.py @@ -101,6 +101,17 @@ def test_single_prompt_similarity(python_emb, cpp_emb, tokens, prompt): 'rms_diff': np.sqrt(np.mean(diff_matrix**2)) } +def read_prompt_from_file(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read().strip() + except FileNotFoundError: + print(f"Error: Prompts file '{file_path}' not found") + exit(1) + except Exception as e: + print(f"Error reading prompts file: {e}") + exit(1) + def main(): parser = argparse.ArgumentParser(description='Test semantic similarity between Python and llama.cpp embeddings') parser.add_argument('--model-path', '-m', required=True, help='Path to the original Python model') @@ -108,14 +119,20 @@ def main(): parser.add_argument('--cpp-embeddings', '-ce', help='Path to llama.cpp embeddings "logits" binary file') parser.add_argument('--causal', '-c', default=False, help='if the model is causal (default: false)', action='store_true') parser.add_argument('--prompt', '-p', default='Hello world today', help='Test prompt') + parser.add_argument('--prompts-file', '-pf', help='Path to file containing prompts') args = parser.parse_args() + if args.prompts_file: + prompt = read_prompt_from_file(args.prompts_file) + else: + prompt = args.prompt + print("Semantic Similarity Test Between Python and llama.cpp Embedding Models") print("=" * 70) # Single prompt detailed comparison - print(f"\nTesting with prompt: '{args.prompt}'") + print(f"\nTesting with prompt: '{prompt}'") # Load the python model to get configuration information and also to load the tokenizer. print("Loading model and tokenizer using AutoTokenizer:", args.model_path) @@ -144,7 +161,7 @@ def main(): else: model = AutoModel.from_pretrained(args.model_path) - encoded = tokenizer(args.prompt, return_tensors="pt") + encoded = tokenizer(prompt, return_tensors="pt") tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0]) n_tokens = len(tokens) print(f"n_tokens: {n_tokens}"); @@ -155,7 +172,7 @@ def main(): python_embeddings = load_embeddings_from_file(args.python_embeddings, n_tokens, model.config.hidden_size) # Run comparison - results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, args.prompt) + results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, prompt) # Summary print(f"\n=== SUMMARY ===")