tests: add tests for GGUF (llama/10830)

author Johannes Gäßler <redacted>

Tue, 17 Dec 2024 18:09:35 +0000 (19:09 +0100)

committer Georgi Gerganov <redacted>

Sat, 4 Jan 2025 08:45:01 +0000 (10:45 +0200)
author Johannes Gäßler <redacted>
Tue, 17 Dec 2024 18:09:35 +0000 (19:09 +0100)
committer Georgi Gerganov <redacted>
Sat, 4 Jan 2025 08:45:01 +0000 (10:45 +0200)
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h

index f961134edd73584204be3cd65cbcea9831f0a5e5..549772c57c90a62085395b1e27cfac99b9f5a779 100644 (file)
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@@ -551,6 +551,22 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
  #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
  #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
  
+// expose GGUF internals for test code
+
+GGML_API size_t gguf_type_size(enum gguf_type type);
+
+GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
+
+struct gguf_buf {
+    void * data;
+    size_t size;
+    size_t offset;
+};
+GGML_API struct gguf_buf gguf_buf_init(size_t size);
+GGML_API void gguf_buf_free(struct gguf_buf buf);
+
+GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c

index 030d93a5177c0b7589795e0d89552e24a2a9b23c..0efd2b2ebf780993fc819607b78bd0ffd90764df 100644 (file)
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -6489,7 +6489,7 @@ struct gguf_context {
      void * data;
  };
  
-static size_t gguf_type_size(enum gguf_type type) {
+size_t gguf_type_size(enum gguf_type type) {
      GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
      return GGUF_TYPE_SIZE[type];
  }
@@ -6617,13 +6617,7 @@ struct gguf_context * gguf_init_empty(void) {
      return ctx;
  }
  
-struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
-    FILE * file = ggml_fopen(fname, "rb");
-    if (!file) {
-        fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
-        return NULL;
-    }
-
+struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
      // offset from start of file
      size_t offset = 0;
  
@@ -6636,7 +6630,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
          for (uint32_t i = 0; i < sizeof(magic); i++) {
              if (magic[i] != GGUF_MAGIC[i]) {
                  fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
-                fclose(file);
                  return NULL;
              }
          }
@@ -6647,7 +6640,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
      struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
      if (!ctx) {
          fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
-        fclose(file);
          return NULL;
      }
  
@@ -6665,7 +6657,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  
          if (ctx->header.version == 1) {
              fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
-            fclose(file);
              gguf_free(ctx);
              return NULL;
          }
@@ -6678,7 +6669,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  
          if (!ok) {
              fprintf(stderr, "%s: failed to read header\n", __func__);
-            fclose(file);
              gguf_free(ctx);
              return NULL;
          }
@@ -6688,12 +6678,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
      {
          const uint64_t n_kv = ctx->header.n_kv;
  
-        ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
-        if (!ctx->kv) {
-            fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
-            fclose(file);
-            gguf_free(ctx);
-            return NULL;
+        if (n_kv > 0) {
+            ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
+            if (!ctx->kv) {
+                fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
+                gguf_free(ctx);
+                return NULL;
+            }
          }
  
          for (uint64_t i = 0; i < n_kv; ++i) {
@@ -6740,7 +6731,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                                      // prevent from integer overflow in the malloc below
                                      if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
                                          fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
-                                        fclose(file);
                                          gguf_free(ctx);
                                          return NULL;
                                      }
@@ -6748,7 +6738,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                                      kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
                                      if (!kv->value.arr.data) {
                                          fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
-                                        fclose(file);
                                          gguf_free(ctx);
                                          return NULL;
                                      }
@@ -6760,7 +6749,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                                      // prevent from integer overflow in the malloc below
                                      if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
                                          fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
-                                        fclose(file);
                                          gguf_free(ctx);
                                          return NULL;
                                      }
@@ -6768,7 +6756,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                                      kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
                                      if (!kv->value.arr.data) {
                                          fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
-                                        fclose(file);
                                          gguf_free(ctx);
                                          return NULL;
                                      }
@@ -6799,7 +6786,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  
          if (!ok) {
              fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
-            fclose(file);
              gguf_free(ctx);
              return NULL;
          }
@@ -6810,7 +6796,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
          ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
          if (!ctx->infos) {
              fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
-            fclose(file);
              gguf_free(ctx);
              return NULL;
          }
@@ -6846,7 +6831,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  
              if (!ok) {
                  fprintf(stderr, "%s: failed to read tensor info\n", __func__);
-                fclose(file);
                  gguf_free(ctx);
                  return NULL;
              }
@@ -6889,7 +6873,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                  // this tensor type support have been removed:
                  fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
                          __func__, info->name.data, (int) info->type, ggml_type_name(info->type));
-                fclose(file);
                  gguf_free(ctx);
                  return NULL;
              }
@@ -6897,7 +6880,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
              if (ne % ggml_blck_size(info->type) != 0) {
                  fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
                          __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
-                fclose(file);
                  gguf_free(ctx);
                  return NULL;
              }
@@ -6929,7 +6911,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
          *params.ctx = ggml_init(pdata);
          if (*params.ctx == NULL) {
              fprintf(stderr, "%s: failed to initialize context\n", __func__);
-            fclose(file);
              gguf_free(ctx);
              return NULL;
          }
@@ -6948,7 +6929,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  
              if (!ok) {
                  fprintf(stderr, "%s: failed to read tensor data\n", __func__);
-                fclose(file);
                  ggml_free(ctx_data);
                  gguf_free(ctx);
                  return NULL;
@@ -6987,7 +6967,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
  
          if (!ok) {
              fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
-            fclose(file);
              ggml_free(ctx_data);
              gguf_free(ctx);
              return NULL;
@@ -6996,11 +6975,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
          ggml_set_no_alloc(ctx_data, params.no_alloc);
      }
  
-    fclose(file);
-
      return ctx;
  }
  
+struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
+    FILE * file = ggml_fopen(fname, "rb");
+    if (!file) {
+        fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
+        return NULL;
+    }
+
+    struct gguf_context * result = gguf_init_from_file_impl(file, params);
+    fclose(file);
+    return result;
+}
+
  void gguf_free(struct gguf_context * ctx) {
      if (ctx == NULL) {
          return;
@@ -7460,13 +7449,7 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
  //    fwrite(val, sizeof(char), size, file);
  //}
  
-struct gguf_buf {
-    void * data;
-    size_t size;
-    size_t offset;
-};
-
-static struct gguf_buf gguf_buf_init(size_t size) {
+struct gguf_buf gguf_buf_init(size_t size) {
      struct gguf_buf buf = {
          /*buf.data   =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
          /*buf.size   =*/ size,
@@ -7476,7 +7459,7 @@ static struct gguf_buf gguf_buf_init(size_t size) {
      return buf;
  }
  
-static void gguf_buf_free(struct gguf_buf buf) {
+void gguf_buf_free(struct gguf_buf buf) {
      if (buf.data) {
          GGML_FREE(buf.data);
      }
@@ -7514,7 +7497,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
      buf->offset += el_size;
  }
  
-static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
+void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
      // write header
      gguf_bwrite_el(buf, &ctx->header.magic,     sizeof(ctx->header.magic));
      gguf_bwrite_el(buf, &ctx->header.version,   sizeof(ctx->header.version));
author	Johannes Gäßler <redacted>
	Tue, 17 Dec 2024 18:09:35 +0000 (19:09 +0100)
committer	Georgi Gerganov <redacted>
	Sat, 4 Jan 2025 08:45:01 +0000 (10:45 +0200)
ggml/src/ggml-impl.h		patch \| blob \| history
ggml/src/ggml.c		patch \| blob \| history