llama : add functions to get the model's metadata (#4013)

author slaren <redacted>

Fri, 17 Nov 2023 15:17:37 +0000 (16:17 +0100)

committer GitHub <redacted>

Fri, 17 Nov 2023 15:17:37 +0000 (17:17 +0200)
author slaren <redacted>
Fri, 17 Nov 2023 15:17:37 +0000 (16:17 +0100)
committer GitHub <redacted>
Fri, 17 Nov 2023 15:17:37 +0000 (17:17 +0200)
diff --git a/ggml.c b/ggml.c

index c7086ba844c6006584cf38e8857eb3c3ab519516..f92292b39c635e7539031e75bb8b0274f3356fae 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -18452,24 +18452,29 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
  }
  
  const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      return ctx->kv[key_id].key.data;
  }
  
  enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      return ctx->kv[key_id].type;
  }
  
  enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
      return ctx->kv[key_id].value.arr.type;
  }
  
  const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
      return ctx->kv[key_id].value.arr.data;
  }
  
  const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
      struct gguf_kv * kv = &ctx->kv[key_id];
      struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
@@ -18477,70 +18482,90 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i
  }
  
  int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
      return ctx->kv[key_id].value.arr.n;
  }
  
  uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
      return ctx->kv[key_id].value.uint8;
  }
  
  int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
      return ctx->kv[key_id].value.int8;
  }
  
  uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
      return ctx->kv[key_id].value.uint16;
  }
  
  int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
      return ctx->kv[key_id].value.int16;
  }
  
  uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
      return ctx->kv[key_id].value.uint32;
  }
  
  int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
      return ctx->kv[key_id].value.int32;
  }
  
  float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
      return ctx->kv[key_id].value.float32;
  }
  
  uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
      return ctx->kv[key_id].value.uint64;
  }
  
  int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
      return ctx->kv[key_id].value.int64;
  }
  
  double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
      return ctx->kv[key_id].value.float64;
  }
  
  bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
      return ctx->kv[key_id].value.bool_;
  }
  
  const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
      GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
      return ctx->kv[key_id].value.str.data;
  }
  
+const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
+    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
+    GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
+    GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
+    return &ctx->kv[key_id].value;
+}
+
  int gguf_get_n_tensors(const struct gguf_context * ctx) {
      return ctx->header.n_tensors;
  }
diff --git a/ggml.h b/ggml.h

index 8e6b646066b7a488197becae814d17e504916194..f2fce0f22d357af99af74d6ea53402f04c9dda1d 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -2045,6 +2045,7 @@ extern "C" {
      GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
      GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
      GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
+    GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
      GGML_API int          gguf_get_arr_n   (const struct gguf_context * ctx, int key_id);
      GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
      GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
diff --git a/llama.cpp b/llama.cpp

index 8500b20cbaf09f567925f6825b3710268c1f2eb8..3cc3fc9f0f3fb4ede276ebc964fcad078cf2c975 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -604,6 +604,60 @@ static int8_t llama_rope_scaling_type_from_string(const std::string & name) {
      return LLAMA_ROPE_SCALING_UNSPECIFIED;
  }
  
+static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
+    switch (type) {
+        case GGUF_TYPE_UINT8:   return std::to_string(((const uint8_t  *)data)[i]);
+        case GGUF_TYPE_INT8:    return std::to_string(((const int8_t   *)data)[i]);
+        case GGUF_TYPE_UINT16:  return std::to_string(((const uint16_t *)data)[i]);
+        case GGUF_TYPE_INT16:   return std::to_string(((const int16_t  *)data)[i]);
+        case GGUF_TYPE_UINT32:  return std::to_string(((const uint32_t *)data)[i]);
+        case GGUF_TYPE_INT32:   return std::to_string(((const int32_t  *)data)[i]);
+        case GGUF_TYPE_UINT64:  return std::to_string(((const uint64_t *)data)[i]);
+        case GGUF_TYPE_INT64:   return std::to_string(((const int64_t  *)data)[i]);
+        case GGUF_TYPE_FLOAT32: return std::to_string(((const float    *)data)[i]);
+        case GGUF_TYPE_FLOAT64: return std::to_string(((const double   *)data)[i]);
+        case GGUF_TYPE_BOOL:    return ((const bool *)data)[i] ? "true" : "false";
+        default:                return format("unknown type %d", type);
+    }
+}
+
+static std::string gguf_kv_to_str(struct gguf_context * ctx_gguf, int i) {
+    const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
+
+    switch (type) {
+        case GGUF_TYPE_STRING:
+            return gguf_get_val_str(ctx_gguf, i);
+        case GGUF_TYPE_ARRAY:
+            {
+                const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
+                int arr_n = gguf_get_arr_n(ctx_gguf, i);
+                const void * data = gguf_get_arr_data(ctx_gguf, i);
+                std::stringstream ss;
+                ss << "[";
+                for (int j = 0; j < arr_n; j++) {
+                    if (arr_type == GGUF_TYPE_STRING) {
+                        std::string val = gguf_get_arr_str(ctx_gguf, i, j);
+                        // escape quotes
+                        replace_all(val, "\\", "\\\\");
+                        replace_all(val, "\"", "\\\"");
+                        ss << '"' << val << '"';
+                    } else if (arr_type == GGUF_TYPE_ARRAY) {
+                        ss << "???";
+                    } else {
+                        ss << gguf_data_to_str(arr_type, data, j);
+                    }
+                    if (j < arr_n - 1) {
+                        ss << ", ";
+                    }
+                }
+                ss << "]";
+                return ss.str();
+            }
+        default:
+            return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
+    }
+}
+
  //
  // ggml helpers
  //
@@ -1327,6 +1381,9 @@ struct llama_model {
  
      int n_gpu_layers;
  
+    // gguf metadata
+    std::unordered_map<std::string, std::string> gguf_kv;
+
      // context
      struct ggml_context * ctx = NULL;
  
@@ -1785,10 +1842,10 @@ struct llama_model_loader {
                  case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break;
                  case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K;   break;
                  default:
-                     {
-                         LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
-                         ftype = LLAMA_FTYPE_ALL_F32;
-                     } break;
+                    {
+                        LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
+                        ftype = LLAMA_FTYPE_ALL_F32;
+                    } break;
              }
  
              // this is a way to mark that we have "guessed" the file type
@@ -1802,10 +1859,20 @@ struct llama_model_loader {
              }
  
              for (int i = 0; i < n_kv; i++) {
-                const char * name         = gguf_get_key(ctx_gguf, i);
-                const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
+                const char * name           = gguf_get_key(ctx_gguf, i);
+                const enum gguf_type type   = gguf_get_kv_type(ctx_gguf, i);
+                const std::string type_name =
+                    type == GGUF_TYPE_ARRAY
+                    ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i))
+                    : gguf_type_name(type);
+
+                std::string value          = gguf_kv_to_str(ctx_gguf, i);
+                const size_t MAX_VALUE_LEN = 40;
+                if (value.size() > MAX_VALUE_LEN) {
+                    value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
+                }
  
-                LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type));
+                LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str());
              }
  
              // print type counts
@@ -2100,6 +2167,17 @@ static void llm_load_hparams(
  
      auto & hparams = model.hparams;
  
+    // get metadata as string
+    for (int i = 0; i < gguf_get_n_kv(ctx); i++) {
+        enum gguf_type type = gguf_get_kv_type(ctx, i);
+        if (type == GGUF_TYPE_ARRAY) {
+            continue;
+        }
+        const char * name = gguf_get_key(ctx, i);
+        const std::string value = gguf_kv_to_str(ctx, i);
+        model.gguf_kv.emplace(name, value);
+    }
+
      // get general kv
      GGUF_GET_KEY(ctx, model.name, gguf_get_val_str, GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_NAME));
  
@@ -8671,6 +8749,45 @@ float llama_rope_freq_scale_train(const struct llama_model * model) {
      return model->hparams.rope_freq_scale_train;
  }
  
+int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
+    const auto & it = model->gguf_kv.find(key);
+    if (it == model->gguf_kv.end()) {
+        if (buf_size > 0) {
+            buf[0] = '\0';
+        }
+        return -1;
+    }
+    return snprintf(buf, buf_size, "%s", it->second.c_str());
+}
+
+int llama_model_meta_count(const struct llama_model * model) {
+    return (int)model->gguf_kv.size();
+}
+
+int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+    if (i < 0 || i >= (int)model->gguf_kv.size()) {
+        if (buf_size > 0) {
+            buf[0] = '\0';
+        }
+        return -1;
+    }
+    auto it = model->gguf_kv.begin();
+    std::advance(it, i);
+    return snprintf(buf, buf_size, "%s", it->first.c_str());
+}
+
+int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+    if (i < 0 || i >= (int)model->gguf_kv.size()) {
+        if (buf_size > 0) {
+            buf[0] = '\0';
+        }
+        return -1;
+    }
+    auto it = model->gguf_kv.begin();
+    std::advance(it, i);
+    return snprintf(buf, buf_size, "%s", it->second.c_str());
+}
+
  int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
      return snprintf(buf, buf_size, "%s %s %s",
              llama_model_arch_name(model->arch).c_str(),
diff --git a/llama.h b/llama.h

index 0a5d6c60ff15bf9d949a2a65f281180b727143f1..70e8fda4bf1b3522b35dc14ea11120824a0095a8 100644 (file)
--- a/llama.h
+++ b/llama.h
@@ -301,6 +301,23 @@ extern "C" {
      // Get the model's RoPE frequency scaling factor
      LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
  
+    // Functions to access the model's GGUF metadata scalar values
+    // - The functions return the length of the string on success, or -1 on failure
+    // - The output string is always null-terminated and cleared on failure
+    // - GGUF array values are not supported by these functions
+
+    // Get metadata value as a string by key name
+    LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size);
+
+    // Get the number of metadata key/value pairs
+    LLAMA_API int llama_model_meta_count(const struct llama_model * model);
+
+    // Get metadata key name by index
+    LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
+
+    // Get metadata value as a string by index
+    LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size);
+
      // Get a string describing the model type
      LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
author	slaren <redacted>
	Fri, 17 Nov 2023 15:17:37 +0000 (16:17 +0100)
committer	GitHub <redacted>
	Fri, 17 Nov 2023 15:17:37 +0000 (17:17 +0200)
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history
llama.cpp		patch \| blob \| history
llama.h		patch \| blob \| history