llama : add gguf_remove_key + remove split meta during quantize (llama/6591)

author jiez <redacted>

Fri, 12 Apr 2024 10:45:06 +0000 (18:45 +0800)

committer Georgi Gerganov <redacted>

Mon, 13 May 2024 08:02:26 +0000 (11:02 +0300)
author jiez <redacted>
Fri, 12 Apr 2024 10:45:06 +0000 (18:45 +0800)
committer Georgi Gerganov <redacted>
Mon, 13 May 2024 08:02:26 +0000 (11:02 +0300)
diff --git a/ggml.c b/ggml.c

index 3256dda8a08e65f550fca7e3006eba1d78252274..2c4b8ec4ff43e462d40b3d39c4bb9fa7e9b85211 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -20621,6 +20621,32 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
      return ok;
  }
  
+static void gguf_free_kv(struct gguf_kv * kv) {
+    if (kv->key.data) {
+        GGML_FREE(kv->key.data);
+    }
+
+    if (kv->type == GGUF_TYPE_STRING) {
+        if (kv->value.str.data) {
+            GGML_FREE(kv->value.str.data);
+        }
+    }
+
+    if (kv->type == GGUF_TYPE_ARRAY) {
+        if (kv->value.arr.data) {
+            if (kv->value.arr.type == GGUF_TYPE_STRING) {
+                for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
+                    struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
+                    if (str->data) {
+                        GGML_FREE(str->data);
+                    }
+                }
+            }
+            GGML_FREE(kv->value.arr.data);
+        }
+    }
+}
+
  struct gguf_context * gguf_init_empty(void) {
      struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
  
@@ -20970,31 +20996,7 @@ void gguf_free(struct gguf_context * ctx) {
      if (ctx->kv) {
          // free string memory - not great..
          for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
-            struct gguf_kv * kv = &ctx->kv[i];
-
-            if (kv->key.data) {
-                GGML_FREE(kv->key.data);
-            }
-
-            if (kv->type == GGUF_TYPE_STRING) {
-                if (kv->value.str.data) {
-                    GGML_FREE(kv->value.str.data);
-                }
-            }
-
-            if (kv->type == GGUF_TYPE_ARRAY) {
-                if (kv->value.arr.data) {
-                    if (kv->value.arr.type == GGUF_TYPE_STRING) {
-                        for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
-                            struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
-                            if (str->data) {
-                                GGML_FREE(str->data);
-                            }
-                        }
-                    }
-                    GGML_FREE(kv->value.arr.data);
-                }
-            }
+            gguf_free_kv(&ctx->kv[i]);
          }
  
          GGML_FREE(ctx->kv);
@@ -21219,6 +21221,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
      return n_kv;
  }
  
+void gguf_remove_key(struct gguf_context * ctx, const char * key) {
+    const int idx = gguf_find_key(ctx, key);
+    if (idx >= 0) {
+        const int n_kv = gguf_get_n_kv(ctx);
+        gguf_free_kv(&ctx->kv[idx]);
+        for (int i = idx; i < n_kv-1; ++i) {
+            ctx->kv[i] = ctx->kv[i+1];
+        }
+        ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
+        ctx->header.n_kv--;
+    }
+}
+
  void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
      const int idx = gguf_get_or_add_key(ctx, key);
  
diff --git a/ggml.h b/ggml.h

index fbc34f0c9d0d089da5e9673d513e7e8887434fed..1a776ca83e4b0c601c7e1d5802aef1cd435026b7 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -2298,6 +2298,9 @@ extern "C" {
      GGML_API char *         gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
      GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int i);
  
+    // removes key if it exists
+    GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
+
      // overrides existing values or adds a new one
      GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val);
      GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t   val);
author	jiez <redacted>
	Fri, 12 Apr 2024 10:45:06 +0000 (18:45 +0800)
committer	Georgi Gerganov <redacted>
	Mon, 13 May 2024 08:02:26 +0000 (11:02 +0300)