embedding-run-original-model:
$(call validate_embedding_model_path,embedding-run-original-model)
- @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/embedding/run-original-model.py
+ @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
+ ./scripts/embedding/run-original-model.py \
+ $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
embedding-run-converted-model:
- @CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/embedding/run-converted-model.sh ${CONVERTED_EMBEDDING_MODEL}
+ @./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \
+ $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
embedding-verify-logits: embedding-run-original-model embedding-run-converted-model
- @./scripts/embedding/compare-embeddings-logits.sh
+ @./scripts/embedding/compare-embeddings-logits.sh \
+ $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
embedding-inspect-original-model:
$(call validate_embedding_model_path,embedding-inspect-original-model)
$(call quantize_model,$(CONVERTED_EMBEDDING_MODEL),QUANTIZED_EMBEDDING_MODEL)
embedding-run-quantized-model:
- @./scripts/embedding/run-converted-model.sh ${QUANTIZED_EMBEDDING_MODEL}
+ @./scripts/embedding/run-converted-model.sh $(QUANTIZED_EMBEDDING_MODEL) \
+ $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
###
### Perplexity targets/recipes
logits = llama_get_embeddings(ctx);
n_logits = llama_model_n_embd(model) * batch.n_tokens;
type = "-embeddings";
+
+ const int n_embd = llama_model_n_embd(model);
+ const int n_embd_count = batch.n_tokens;
+
+ printf("Embedding dimension: %d\n", n_embd);
+ printf("\n");
+
+ // Print embeddings in the specified format
+ for (int j = 0; j < n_embd_count; j++) {
+ printf("embedding %d: ", j);
+
+ // Print first 3 values
+ for (int i = 0; i < 3 && i < n_embd; i++) {
+ printf("%9.6f ", logits[j * n_embd + i]);
+ }
+
+ printf(" ... ");
+
+ // Print last 3 values
+ for (int i = n_embd - 3; i < n_embd; i++) {
+ if (i >= 0) {
+ printf("%9.6f ", logits[j * n_embd + i]);
+ }
+ }
+
+ printf("\n");
+ }
+ printf("\n");
+
printf("Embeddings size: %d\n", n_logits);
} else {
logits = llama_get_logits_ith(ctx, batch.n_tokens - 1);
return 1;
}
for (int i = 0; i < n_logits; i++) {
- fprintf(f, "%d: %.6f\n", i, logits[i]); // Added index and changed format
+ fprintf(f, "%d: %.6f\n", i, logits[i]);
}
fclose(f);
- // Print first and last 10 logits for quick verification
- printf("First 10 logits: ");
- for (int i = 0; i < 10 && i < n_logits; i++) {
- printf("%.6f ", logits[i]);
- }
- printf("\n");
+ if (!embedding_mode) {
+ printf("First 10 logits: ");
+ for (int i = 0; i < 10 && i < n_logits; i++) {
+ printf("%.6f ", logits[i]);
+ }
+ printf("\n");
- printf("Last 10 logits: ");
- for (int i = n_logits - 10; i < n_logits; i++) {
- if (i >= 0) printf("%.6f ", logits[i]);
+ printf("Last 10 logits: ");
+ for (int i = n_logits - 10; i < n_logits; i++) {
+ if (i >= 0) printf("%.6f ", logits[i]);
+ }
+ printf("\n\n");
}
- printf("\n\n");
printf("Logits saved to %s\n", bin_filename);
printf("Logits saved to %s\n", txt_filename);
set -e
-MODEL_PATH="${1:-"$EMBEDDING_MODEL_PATH"}"
-MODEL_NAME="${2:-$(basename "$MODEL_PATH")}"
+# Parse command line arguments
+MODEL_PATH=""
+MODEL_NAME=""
+PROMPTS_FILE=""
+
+# First argument is always model path
+if [ $# -gt 0 ] && [[ "$1" != --* ]]; then
+ MODEL_PATH="$1"
+ shift
+fi
+
+# Parse remaining arguments
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --prompts-file|-pf)
+ PROMPTS_FILE="$2"
+ shift 2
+ ;;
+ *)
+ # If MODEL_NAME not set and this isn't a flag, use as model name
+ if [ -z "$MODEL_NAME" ] && [[ "$1" != --* ]]; then
+ MODEL_NAME="$1"
+ fi
+ shift
+ ;;
+ esac
+done
+
+# Set defaults
+MODEL_PATH="${MODEL_PATH:-"$EMBEDDING_MODEL_PATH"}"
+MODEL_NAME="${MODEL_NAME:-$(basename "$MODEL_PATH")}"
if [ -t 0 ]; then
CPP_EMBEDDINGS="data/llamacpp-${MODEL_NAME}-embeddings.bin"
trap "rm -f $TEMP_FILE" EXIT
fi
-python scripts/utils/semantic_check.py --model-path $MODEL_PATH \
+# Build the semantic_check.py command
+SEMANTIC_CMD="python scripts/utils/semantic_check.py --model-path $MODEL_PATH \
--python-embeddings data/pytorch-${MODEL_NAME}-embeddings.bin \
- --cpp-embeddings $CPP_EMBEDDINGS \
- --prompt "Hello world today"
+ --cpp-embeddings $CPP_EMBEDDINGS"
+
+# Add prompts file if specified, otherwise use default prompt
+if [ -n "$PROMPTS_FILE" ]; then
+ SEMANTIC_CMD="$SEMANTIC_CMD --prompts-file \"$PROMPTS_FILE\""
+else
+ SEMANTIC_CMD="$SEMANTIC_CMD --prompt \"Hello world today\""
+fi
+
+# Execute the command
+eval $SEMANTIC_CMD
set -e
-# First try command line argument, then environment variable, then file
-CONVERTED_MODEL="${1:-"$CONVERTED_EMBEDDING_MODEL"}"
+# Parse command line arguments
+CONVERTED_MODEL=""
+PROMPTS_FILE=""
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ -p|--prompts-file)
+ PROMPTS_FILE="$2"
+ shift 2
+ ;;
+ *)
+ if [ -z "$CONVERTED_MODEL" ]; then
+ CONVERTED_MODEL="$1"
+ fi
+ shift
+ ;;
+ esac
+done
+
+# First try command line argument, then environment variable
+CONVERTED_MODEL="${CONVERTED_MODEL:-"$CONVERTED_EMBEDDING_MODEL"}"
# Final check if we have a model path
if [ -z "$CONVERTED_MODEL" ]; then
exit 1
fi
+# Read prompt from file or use default
+if [ -n "$PROMPTS_FILE" ]; then
+ if [ ! -f "$PROMPTS_FILE" ]; then
+ echo "Error: Prompts file '$PROMPTS_FILE' not found" >&2
+ exit 1
+ fi
+ PROMPT=$(cat "$PROMPTS_FILE")
+else
+ PROMPT="Hello world today"
+fi
+
echo $CONVERTED_MODEL
cmake --build ../../build --target llama-logits -j8
-
-../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "Hello world today"
+# TODO: update logits.cpp to accept a --file/-f option for the prompt
+../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "$PROMPT"
parser = argparse.ArgumentParser(description='Process model with specified path')
parser.add_argument('--model-path', '-m', help='Path to the model')
+parser.add_argument('--prompts-file', '-p', help='Path to file containing prompts (one per line)')
args = parser.parse_args()
+def read_prompt_from_file(file_path):
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ return f.read().strip()
+ except FileNotFoundError:
+ print(f"Error: Prompts file '{file_path}' not found")
+ exit(1)
+ except Exception as e:
+ print(f"Error reading prompts file: {e}")
+ exit(1)
+
model_path = os.environ.get('EMBEDDING_MODEL_PATH', args.model_path)
if model_path is None:
parser.error("Model path must be specified either via --model-path argument or EMBEDDING_MODEL_PATH environment variable")
tokenizer = AutoTokenizer.from_pretrained(model_path)
+config = AutoConfig.from_pretrained(model_path)
+
+# This can be used to override the sliding window size for manual testing. This
+# can be useful to verify the sliding window attention mask in the original model
+# and compare it with the converted .gguf model.
+if hasattr(config, 'sliding_window'):
+ original_sliding_window = config.sliding_window
+ #original_sliding_window = 6
+ print(f"Modified sliding window: {original_sliding_window} -> {config.sliding_window}")
+
+print(f"Using unreleased model: {unreleased_model_name}")
if unreleased_model_name:
model_name_lower = unreleased_model_name.lower()
unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
try:
model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
- model = model_class.from_pretrained(model_path) # Note: from_pretrained, not fromPretrained
+ model = model_class.from_pretrained(model_path, config=config)
except (ImportError, AttributeError) as e:
print(f"Failed to import or load model: {e}")
exit(1)
else:
- model = AutoModel.from_pretrained(model_path)
+ model = AutoModel.from_pretrained(model_path, config=config)
print(f"Model class: {type(model)}")
-#print(f"Model file: {type(model).__module__}")
-config = AutoConfig.from_pretrained(model_path)
+print(f"Model file: {type(model).__module__}")
+
+# Verify the model is using the correct sliding window
+if hasattr(model.config, 'sliding_window'):
+ print(f"Model's sliding_window: {model.config.sliding_window}")
+else:
+ print("Model config does not have sliding_window attribute")
model_name = os.path.basename(model_path)
-texts = [ "Hello world today" ]
+if args.prompts_file:
+ prompt_text = read_prompt_from_file(args.prompts_file)
+ texts = [prompt_text]
+else:
+ texts = ["Hello world today"]
encoded = tokenizer(
texts,
file_path = os.path.join(model_path, file_name)
print(f"\n--- From {file_name} ---")
- with safe_open(file_path, framework="pt") as f: # type: ignore
+ with safe_open(file_path, framework="pt") as f:
for tensor_name in sorted(tensor_names):
tensor = f.get_tensor(tensor_name)
print(f"- {tensor_name} : shape = {tensor.shape}, dtype = {tensor.dtype}")
# Single file model (original behavior)
print("Single-file model detected")
- with safe_open(single_file_path, framework="pt") as f: # type: ignore
+ with safe_open(single_file_path, framework="pt") as f:
keys = f.keys()
print("Tensors in model:")
for key in sorted(keys):
'rms_diff': np.sqrt(np.mean(diff_matrix**2))
}
+def read_prompt_from_file(file_path):
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ return f.read().strip()
+ except FileNotFoundError:
+ print(f"Error: Prompts file '{file_path}' not found")
+ exit(1)
+ except Exception as e:
+ print(f"Error reading prompts file: {e}")
+ exit(1)
+
def main():
parser = argparse.ArgumentParser(description='Test semantic similarity between Python and llama.cpp embeddings')
parser.add_argument('--model-path', '-m', required=True, help='Path to the original Python model')
parser.add_argument('--cpp-embeddings', '-ce', help='Path to llama.cpp embeddings "logits" binary file')
parser.add_argument('--causal', '-c', default=False, help='if the model is causal (default: false)', action='store_true')
parser.add_argument('--prompt', '-p', default='Hello world today', help='Test prompt')
+ parser.add_argument('--prompts-file', '-pf', help='Path to file containing prompts')
args = parser.parse_args()
+ if args.prompts_file:
+ prompt = read_prompt_from_file(args.prompts_file)
+ else:
+ prompt = args.prompt
+
print("Semantic Similarity Test Between Python and llama.cpp Embedding Models")
print("=" * 70)
# Single prompt detailed comparison
- print(f"\nTesting with prompt: '{args.prompt}'")
+ print(f"\nTesting with prompt: '{prompt}'")
# Load the python model to get configuration information and also to load the tokenizer.
print("Loading model and tokenizer using AutoTokenizer:", args.model_path)
else:
model = AutoModel.from_pretrained(args.model_path)
- encoded = tokenizer(args.prompt, return_tensors="pt")
+ encoded = tokenizer(prompt, return_tensors="pt")
tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])
n_tokens = len(tokens)
print(f"n_tokens: {n_tokens}");
python_embeddings = load_embeddings_from_file(args.python_embeddings, n_tokens, model.config.hidden_size)
# Run comparison
- results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, args.prompt)
+ results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, prompt)
# Summary
print(f"\n=== SUMMARY ===")