model-conversion : add model card template for embeddings [no ci] (#15557)

author Daniel Bevenius <redacted>

Mon, 25 Aug 2025 12:25:25 +0000 (14:25 +0200)

committer GitHub <redacted>

Mon, 25 Aug 2025 12:25:25 +0000 (14:25 +0200)
author Daniel Bevenius <redacted>
Mon, 25 Aug 2025 12:25:25 +0000 (14:25 +0200)
committer GitHub <redacted>
Mon, 25 Aug 2025 12:25:25 +0000 (14:25 +0200)
diff --git a/examples/model-conversion/Makefile b/examples/model-conversion/Makefile

index 27d95b4f2bf5e02681286269ba0ea73dfd78379b..2f1c3eb903fe05958da3dc45448672c60c34972a 100644 (file)
--- a/examples/model-conversion/Makefile
+++ b/examples/model-conversion/Makefile
@@ -144,6 +144,15 @@ perplexity-run:
  hf-create-model:
         @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}"
  
+hf-create-model-dry-run:
+       @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -d
+
+hf-create-model-embedding:
+       @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e
+
+hf-create-model-embedding-dry-run:
+       @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e -d
+
  hf-create-model-private:
         @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -p
  
diff --git a/examples/model-conversion/README.md b/examples/model-conversion/README.md

index c924a6be3cd26238f04ae8fa624ad0fb6ee896b5..424c4e5655f959fb09b5e7952a5681c5326af403 100644 (file)
--- a/examples/model-conversion/README.md
+++ b/examples/model-conversion/README.md
@@ -285,13 +285,21 @@ For the following targets a `HF_TOKEN` environment variable is required.
  This will create a new model repsository on Hugging Face with the specified
  model name.
  ```console
-(venv) $ make hf-create-model MODEL_NAME='TestModel' NAMESPACE="danbev"
+(venv) $ make hf-create-model MODEL_NAME='TestModel' NAMESPACE="danbev" ORIGINAL_BASE_MODEL="some-base-model"
  Repository ID:  danbev/TestModel-GGUF
  Repository created: https://huggingface.co/danbev/TestModel-GGUF
  ```
  Note that we append a `-GGUF` suffix to the model name to ensure a consistent
  naming convention for GGUF models.
  
+An embedding model can be created using the following command:
+```console
+(venv) $ make hf-create-model-embedding MODEL_NAME='TestEmbeddingModel' NAMESPACE="danbev" ORIGINAL_BASE_MODEL="some-base-model"
+```
+The only difference is that the model card for an embedding model will be different
+with regards to the llama-server command and also how to access/call the embedding
+endpoint.
+
  ### Upload a GGUF model to model repository
  The following target uploads a model to an existing Hugging Face model repository.
  ```console
diff --git a/examples/model-conversion/scripts/causal/modelcard.template b/examples/model-conversion/scripts/causal/modelcard.template

new file mode 100644 (file)

index 0000000..87800a1
--- /dev/null
+++ b/examples/model-conversion/scripts/causal/modelcard.template
@@ -0,0 +1,13 @@
+---
+base_model:
+- {base_model}
+---
+# {model_name} GGUF
+
+Recommended way to run this model:
+
+```sh
+llama-server -hf {namespace}/{model_name}-GGUF -c 0 -fa
+```
+
+Then, access http://localhost:8080
diff --git a/examples/model-conversion/scripts/embedding/modelcard.template b/examples/model-conversion/scripts/embedding/modelcard.template

new file mode 100644 (file)

index 0000000..75c5805
--- /dev/null
+++ b/examples/model-conversion/scripts/embedding/modelcard.template
@@ -0,0 +1,48 @@
+---
+base_model:
+- {base_model}
+---
+# {model_name} GGUF
+
+Recommended way to run this model:
+
+```sh
+llama-server -hf {namespace}/{model_name}-GGUF
+```
+
+Then the endpoint can be accessed at http://localhost:8080/embedding, for
+example using `curl`:
+```console
+curl --request POST \
+    --url http://localhost:8080/embedding \
+    --header "Content-Type: application/json" \
+    --data '{{"input": "Hello embeddings"}}' \
+    --silent
+```
+
+Alternatively, the `llama-embedding` command line tool can be used:
+```sh
+llama-embedding -hf {namespace}/{model_name}-GGUF --verbose-prompt -p "Hello embeddings"
+```
+
+#### embd_normalize
+When a model uses pooling, or the pooling method is specified using `--pooling`,
+the normalization can be controlled by the `embd_normalize` parameter.
+
+The default value is `2` which means that the embeddings are normalized using
+the Euclidean norm (L2). Other options are:
+* -1 No normalization
+*  0 Max absolute
+*  1 Taxicab
+*  2 Euclidean/L2
+* \>2 P-Norm
+
+This can be passed in the request body to `llama-server`, for example:
+```sh
+    --data '{{"input": "Hello embeddings", "embd_normalize": -1}}' \
+```
+
+And for `llama-embedding`, by passing `--embd-normalize <value>`, for example:
+```sh
+llama-embedding -hf {namespace}/{model_name}-GGUF  --embd-normalize -1 -p "Hello embeddings"
+```
diff --git a/examples/model-conversion/scripts/readme.md.template b/examples/model-conversion/scripts/readme.md.template

deleted file mode 100644 (file)

index 87800a1..0000000
--- a/examples/model-conversion/scripts/readme.md.template
+++ /dev/null
@@ -1,13 +0,0 @@
----
-base_model:
-- {base_model}
----
-# {model_name} GGUF
-
-Recommended way to run this model:
-
-```sh
-llama-server -hf {namespace}/{model_name}-GGUF -c 0 -fa
-```
-
-Then, access http://localhost:8080
diff --git a/examples/model-conversion/scripts/utils/hf-create-model.py b/examples/model-conversion/scripts/utils/hf-create-model.py

index 09bb8511ef13efbdc3c027378d5e6a5c8743431f..ea99bd886f4d1d0316c570119847412390e31222 100755 (executable)
--- a/examples/model-conversion/scripts/utils/hf-create-model.py
+++ b/examples/model-conversion/scripts/utils/hf-create-model.py
@@ -26,21 +26,31 @@ parser.add_argument('--namespace', '-ns', help='Namespace to add the model to',
  parser.add_argument('--org-base-model', '-b', help='Original Base model name', default="")
  parser.add_argument('--no-card', action='store_true', help='Skip creating model card')
  parser.add_argument('--private', '-p', action='store_true', help='Create private model')
+parser.add_argument('--embedding', '-e', action='store_true', help='Use embedding model card template')
+parser.add_argument('--dry-run', '-d', action='store_true', help='Print repository info and template without creating repository')
  
  args = parser.parse_args()
  
  repo_id = f"{args.namespace}/{args.model_name}-GGUF"
  print("Repository ID: ", repo_id)
  
-repo_url = api.create_repo(
-    repo_id=repo_id,
-    repo_type="model",
-    private=args.private,
-    exist_ok=False
-)
+repo_url = None
+if not args.dry_run:
+    repo_url = api.create_repo(
+        repo_id=repo_id,
+        repo_type="model",
+        private=args.private,
+        exist_ok=False
+    )
  
  if not args.no_card:
-    template_path = "scripts/readme.md.template"
+    if args.embedding:
+        template_path = "scripts/embedding/modelcard.template"
+    else:
+        template_path = "scripts/causal/modelcard.template"
+
+    print("Template path: ", template_path)
+
      model_card_content = load_template_and_substitute(
          template_path,
          model_name=args.model_name,
@@ -48,16 +58,21 @@ if not args.no_card:
          base_model=args.org_base_model,
      )
  
-    if model_card_content:
-        api.upload_file(
-            path_or_fileobj=model_card_content.encode('utf-8'),
-            path_in_repo="README.md",
-            repo_id=repo_id
-        )
-        print("Model card created successfully.")
+    if args.dry_run:
+        print("\nTemplate Content:\n")
+        print(model_card_content)
      else:
-        print("Failed to create model card.")
+        if model_card_content:
+            api.upload_file(
+                path_or_fileobj=model_card_content.encode('utf-8'),
+                path_in_repo="README.md",
+                repo_id=repo_id
+            )
+            print("Model card created successfully.")
+        else:
+            print("Failed to create model card.")
  
-print(f"Repository created: {repo_url}")
+if not args.dry_run and repo_url:
+    print(f"Repository created: {repo_url}")
author	Daniel Bevenius <redacted>
	Mon, 25 Aug 2025 12:25:25 +0000 (14:25 +0200)
committer	GitHub <redacted>
	Mon, 25 Aug 2025 12:25:25 +0000 (14:25 +0200)
examples/model-conversion/Makefile		patch \| blob \| history
examples/model-conversion/README.md		patch \| blob \| history
examples/model-conversion/scripts/causal/modelcard.template	[new file with mode: 0644]	patch \| blob
examples/model-conversion/scripts/embedding/modelcard.template	[new file with mode: 0644]	patch \| blob
examples/model-conversion/scripts/readme.md.template	[deleted file]	patch \| blob \| history
examples/model-conversion/scripts/utils/hf-create-model.py		patch \| blob \| history