llama : expose llama_model_n_head_kv in the API (#11997)

author Vitali Lovich <redacted>

Tue, 25 Feb 2025 09:29:33 +0000 (01:29 -0800)

committer GitHub <redacted>

Tue, 25 Feb 2025 09:29:33 +0000 (11:29 +0200)
author Vitali Lovich <redacted>
Tue, 25 Feb 2025 09:29:33 +0000 (01:29 -0800)
committer GitHub <redacted>
Tue, 25 Feb 2025 09:29:33 +0000 (11:29 +0200)
diff --git a/include/llama.h b/include/llama.h

index b0726cbe63ea6b1480ab4008a6c97d85c18c4a0e..479196026b93bf5f3a7600f5a4c100326ebad152 100644 (file)
--- a/include/llama.h
+++ b/include/llama.h
@@ -477,6 +477,7 @@ extern "C" {
      LLAMA_API int32_t llama_model_n_embd     (const struct llama_model * model);
      LLAMA_API int32_t llama_model_n_layer    (const struct llama_model * model);
      LLAMA_API int32_t llama_model_n_head     (const struct llama_model * model);
+    LLAMA_API int32_t llama_model_n_head_kv  (const struct llama_model * model);
  
      // Get the model's RoPE frequency scaling factor
      LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index f64c3afa029830da7ef77980460af2cb10f990e6..36a0a009c45672d1c4b17f2489b48c3759589810 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -3838,6 +3838,10 @@ int32_t llama_model_n_head(const struct llama_model * model) {
      return model->hparams.n_head();
  }
  
+int32_t llama_model_n_head_kv(const struct llama_model * model) {
+    return model->hparams.n_head_kv();
+}
+
  // deprecated
  int32_t llama_n_ctx_train(const struct llama_model * model) {
      return llama_model_n_ctx_train(model);
author	Vitali Lovich <redacted>
	Tue, 25 Feb 2025 09:29:33 +0000 (01:29 -0800)
committer	GitHub <redacted>
	Tue, 25 Feb 2025 09:29:33 +0000 (11:29 +0200)
include/llama.h		patch \| blob \| history
src/llama-model.cpp		patch \| blob \| history