embeddings : fix batch sizes (#13076)

author Georgi Gerganov <redacted>

Thu, 24 Apr 2025 19:29:22 +0000 (22:29 +0300)

committer GitHub <redacted>

Thu, 24 Apr 2025 19:29:22 +0000 (22:29 +0300)
author Georgi Gerganov <redacted>
Thu, 24 Apr 2025 19:29:22 +0000 (22:29 +0300)
committer GitHub <redacted>
Thu, 24 Apr 2025 19:29:22 +0000 (22:29 +0300)
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp

index 6f08904159fd50a1045eadb78f9048624119096a..06fce236e2b85aa3fe74cbf3442e943ef5d5a5d8 100644 (file)
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -89,6 +89,13 @@ int main(int argc, char ** argv) {
      common_init();
  
      params.embedding = true;
+
+    // utilize the full context
+    if (params.n_batch < params.n_ctx) {
+        LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);
+        params.n_batch = params.n_ctx;
+    }
+
      // For non-causal models, batch size must be equal to ubatch size
      params.n_ubatch = params.n_batch;
  
@@ -134,7 +141,6 @@ int main(int argc, char ** argv) {
  
      // max batch size
      const uint64_t n_batch = params.n_batch;
-    GGML_ASSERT(params.n_batch >= params.n_ctx);
  
      // tokenize the prompts and trim
      std::vector<std::vector<int32_t>> inputs;
author	Georgi Gerganov <redacted>
	Thu, 24 Apr 2025 19:29:22 +0000 (22:29 +0300)
committer	GitHub <redacted>
	Thu, 24 Apr 2025 19:29:22 +0000 (22:29 +0300)