}
}
+float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n){
+ double sum = 0.0;
+ double sum1 = 0.0;
+ double sum2 = 0.0;
+
+ for (int i = 0; i < n; i++) {
+ sum += embd1[i] * embd2[i];
+ sum1 += embd1[i] * embd1[i];
+ sum2 += embd2[i] * embd2[i];
+ }
+
+ return sum / (sqrt(sum1) * sqrt(sum2));
+}
batch_decode(ctx, batch, out, s, n_embd);
// print first 3 embeddings
+ fprintf(stdout, "\n");
for (int j = 0; j < std::min(3, n_prompts); j++) {
- fprintf(stderr, "embedding %d: ", j);
- for (int i = 0; i < n_embd; i++) {
- fprintf(stderr, "%f ", emb[j * n_embd + i]);
+ fprintf(stdout, "embedding %d: ", j);
+ for (int i = 0; i < std::min(16, n_embd); i++) {
+ fprintf(stdout, "%f ", emb[j * n_embd + i]);
}
- fprintf(stderr, "\n\n");
+ fprintf(stdout, "\n");
+ }
+
+ // print cosine similarity matrix
+ fprintf(stdout, "\n");
+ printf("cosine similarity matrix:\n\n");
+ for (int i = 0; i < n_prompts; i++) {
+ for (int j = 0; j < n_prompts; j++) {
+ float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
+ fprintf(stdout, "%6.2f ", sim);
+ }
+ fprintf(stdout, "\n");
}
- fprintf(stderr, "\n");
// clean up
llama_print_timings(ctx);
// #define GRIT_DEBUG
-static float dot_product(const std::vector<float> & v1, const std::vector<float> & v2) {
- float dot = 0.0f;
- for (uint64_t i = 0; i < v1.size(); ++i) {
- dot += v1[i] * v2[i];
- }
- return dot;
-}
-
-static float norm(const std::vector<float> & v) {
- return std::sqrt(dot_product(v, v));
-}
-
-static float cosine_similarity(const std::vector<float> & v1, const std::vector<float> & v2) {
- return dot_product(v1, v2) / (norm(v1) * norm(v2));
-}
-
static std::vector<std::vector<float>> encode(llama_context * ctx, const std::vector<std::string> & sentences, const std::string & instruction) {
std::vector<std::vector<float>> result;
const std::vector<std::vector<float>> d_rep = encode(ctx, documents, gritlm_instruction(""));
const std::vector<std::vector<float>> q_rep = encode(ctx, queries, gritlm_instruction(instruction));
- const float cosine_sim_q0_d0 = cosine_similarity(q_rep[0], d_rep[0]);
- const float cosine_sim_q0_d1 = cosine_similarity(q_rep[0], d_rep[1]);
- const float cosine_sim_q1_d0 = cosine_similarity(q_rep[1], d_rep[0]);
- const float cosine_sim_q1_d1 = cosine_similarity(q_rep[1], d_rep[1]);
+ const int n_embd = llama_n_embd(mdl);
+
+ const float cosine_sim_q0_d0 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
+ const float cosine_sim_q0_d1 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);
+ const float cosine_sim_q1_d0 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[0].data(), n_embd);
+ const float cosine_sim_q1_d1 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[1].data(), n_embd);
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[0].c_str(), cosine_sim_q0_d0);
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[1].c_str(), cosine_sim_q0_d1);