llama : add llama_get_pooling_type function (#6862)

author Douglas Hanley <redacted>

Wed, 24 Apr 2024 13:10:07 +0000 (08:10 -0500)

committer GitHub <redacted>

Wed, 24 Apr 2024 13:10:07 +0000 (16:10 +0300)
author Douglas Hanley <redacted>
Wed, 24 Apr 2024 13:10:07 +0000 (08:10 -0500)
committer GitHub <redacted>
Wed, 24 Apr 2024 13:10:07 +0000 (16:10 +0300)
diff --git a/common/common.h b/common/common.h

index 157b54a3e9e0824ec4f02779ca5c4d193553c8fe..87361e8e915008489454797a92ce5f164b844dae 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -86,8 +86,8 @@ struct gpt_params {
  
      ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
  
-    llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
-    llama_pooling_type      pooling_type      = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
+    enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
+    enum llama_pooling_type      pooling_type      = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
  
      // // sampling parameters
      struct llama_sampling_params sparams;
diff --git a/llama.cpp b/llama.cpp

index 3a4a03d8f29fb0be9b832c9f03442a089f939564..3a84b4916bd3080b4fa2eb73b274e79b68212f32 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -15599,6 +15599,10 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
      return LLAMA_ROPE_TYPE_NONE;
  }
  
+enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {
+    return ctx->cparams.pooling_type;
+}
+
  int32_t llama_n_vocab(const struct llama_model * model) {
      return model->hparams.n_vocab;
  }
diff --git a/llama.h b/llama.h

index 7bfd13740cf25f157e3976e3747e69cb2e150c6c..0eb2a1e9ab0a24a4866ee683b0e3bd9bb452f97d 100644 (file)
--- a/llama.h
+++ b/llama.h
@@ -390,8 +390,10 @@ extern "C" {
      LLAMA_API uint32_t llama_n_ubatch   (const struct llama_context * ctx);
      LLAMA_API uint32_t llama_n_seq_max  (const struct llama_context * ctx);
  
-    LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
-    LLAMA_API enum llama_rope_type  llama_rope_type (const struct llama_model * model);
+    LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
+
+    LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_model   * model);
+    LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model   * model);
  
      LLAMA_API int32_t llama_n_vocab    (const struct llama_model * model);
      LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
author	Douglas Hanley <redacted>
	Wed, 24 Apr 2024 13:10:07 +0000 (08:10 -0500)
committer	GitHub <redacted>
	Wed, 24 Apr 2024 13:10:07 +0000 (16:10 +0300)
common/common.h		patch \| blob \| history
llama.cpp		patch \| blob \| history
llama.h		patch \| blob \| history