embedding : print cosine similarity (#899)

author Georgi Gerganov <redacted>

Thu, 14 Mar 2024 08:12:29 +0000 (10:12 +0200)

committer Georgi Gerganov <redacted>

Thu, 14 Mar 2024 08:12:29 +0000 (10:12 +0200)
author Georgi Gerganov <redacted>
Thu, 14 Mar 2024 08:12:29 +0000 (10:12 +0200)
committer Georgi Gerganov <redacted>
Thu, 14 Mar 2024 08:12:29 +0000 (10:12 +0200)
diff --git a/common/common.cpp b/common/common.cpp

index 73b1b61ba1b74f28ecb7ed2d5b244baa7243db9c..58fbd05aa35165c0388448bc29595951e3d99522 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1877,3 +1877,16 @@ void llama_embd_normalize(const float * inp, float * out, int n) {
      }
  }
  
+float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n){
+    double sum  = 0.0;
+    double sum1 = 0.0;
+    double sum2 = 0.0;
+
+    for (int i = 0; i < n; i++) {
+        sum  += embd1[i] * embd2[i];
+        sum1 += embd1[i] * embd1[i];
+        sum2 += embd2[i] * embd2[i];
+    }
+
+    return sum / (sqrt(sum1) * sqrt(sum2));
+}
diff --git a/common/common.h b/common/common.h

index 0f178b9eb1de37bb4ff6a104839a6767c43b9d39..d250eef8b2b6b4e5b2f20cfc91c66798b58bea83 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -268,3 +268,4 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40
  
  void llama_embd_normalize(const float * inp, float * out, int n);
  
+float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp

index 49302a199977ed35880de6e4fd97b8db0cfdffa8..f390c406104467ae4ee9b5b9cf83dbcfb16563aa 100644 (file)
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -168,14 +168,25 @@ int main(int argc, char ** argv) {
      batch_decode(ctx, batch, out, s, n_embd);
  
      // print first 3 embeddings
+    fprintf(stdout, "\n");
      for (int j = 0; j < std::min(3, n_prompts); j++) {
-        fprintf(stderr, "embedding %d: ", j);
-        for (int i = 0; i < n_embd; i++) {
-            fprintf(stderr, "%f ", emb[j * n_embd + i]);
+        fprintf(stdout, "embedding %d: ", j);
+        for (int i = 0; i < std::min(16, n_embd); i++) {
+            fprintf(stdout, "%f ", emb[j * n_embd + i]);
          }
-        fprintf(stderr, "\n\n");
+        fprintf(stdout, "\n");
+    }
+
+    // print cosine similarity matrix
+    fprintf(stdout, "\n");
+    printf("cosine similarity matrix:\n\n");
+    for (int i = 0; i < n_prompts; i++) {
+        for (int j = 0; j < n_prompts; j++) {
+            float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
+            fprintf(stdout, "%6.2f ", sim);
+        }
+        fprintf(stdout, "\n");
      }
-    fprintf(stderr, "\n");
  
      // clean up
      llama_print_timings(ctx);
diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp

index 3d4b085d69b6feccd2f586debcc91fb80a593c4a..52fd719b38ee56014c8d4a63842bb6865f66c8d3 100644 (file)
--- a/examples/gritlm/gritlm.cpp
+++ b/examples/gritlm/gritlm.cpp
@@ -6,22 +6,6 @@
  
  // #define GRIT_DEBUG
  
-static float dot_product(const std::vector<float> & v1, const std::vector<float> & v2) {
-    float dot = 0.0f;
-    for (uint64_t i = 0; i < v1.size(); ++i) {
-        dot += v1[i] * v2[i];
-    }
-    return dot;
-}
-
-static float norm(const std::vector<float> & v) {
-    return std::sqrt(dot_product(v, v));
-}
-
-static float cosine_similarity(const std::vector<float> & v1, const std::vector<float> & v2) {
-    return dot_product(v1, v2) / (norm(v1) * norm(v2));
-}
-
  static std::vector<std::vector<float>> encode(llama_context * ctx, const std::vector<std::string> & sentences, const std::string & instruction) {
      std::vector<std::vector<float>> result;
  
@@ -203,10 +187,12 @@ int main(int argc, char * argv[]) {
          const std::vector<std::vector<float>> d_rep = encode(ctx, documents, gritlm_instruction(""));
          const std::vector<std::vector<float>> q_rep = encode(ctx, queries,   gritlm_instruction(instruction));
  
-        const float cosine_sim_q0_d0 = cosine_similarity(q_rep[0], d_rep[0]);
-        const float cosine_sim_q0_d1 = cosine_similarity(q_rep[0], d_rep[1]);
-        const float cosine_sim_q1_d0 = cosine_similarity(q_rep[1], d_rep[0]);
-        const float cosine_sim_q1_d1 = cosine_similarity(q_rep[1], d_rep[1]);
+        const int n_embd = llama_n_embd(mdl);
+
+        const float cosine_sim_q0_d0 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
+        const float cosine_sim_q0_d1 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);
+        const float cosine_sim_q1_d0 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[0].data(), n_embd);
+        const float cosine_sim_q1_d1 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[1].data(), n_embd);
  
          std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[0].c_str(), cosine_sim_q0_d0);
          std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[1].c_str(), cosine_sim_q0_d1);
author	Georgi Gerganov <redacted>
	Thu, 14 Mar 2024 08:12:29 +0000 (10:12 +0200)
committer	Georgi Gerganov <redacted>
	Thu, 14 Mar 2024 08:12:29 +0000 (10:12 +0200)
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
examples/embedding/embedding.cpp		patch \| blob \| history
examples/gritlm/gritlm.cpp		patch \| blob \| history