ggml : alloc ggml_contexts on the heap (whisper/2525)

author Georgi Gerganov <redacted>

Fri, 1 Nov 2024 08:23:05 +0000 (10:23 +0200)

committer Georgi Gerganov <redacted>

Fri, 1 Nov 2024 08:23:05 +0000 (10:23 +0200)
author Georgi Gerganov <redacted>
Fri, 1 Nov 2024 08:23:05 +0000 (10:23 +0200)
committer Georgi Gerganov <redacted>
Fri, 1 Nov 2024 08:23:05 +0000 (10:23 +0200)
diff --git a/include/ggml.h b/include/ggml.h

index de3c706fc251c4c518aa570dfb944d7d8034a133..e5862246c8c854d122b06e7d891454334faf3ba5 100644 (file)
--- a/include/ggml.h
+++ b/include/ggml.h
@@ -217,7 +217,6 @@
  
  #define GGML_MAX_DIMS           4
  #define GGML_MAX_PARAMS         2048
-#define GGML_MAX_CONTEXTS       64
  #define GGML_MAX_SRC            10
  #define GGML_MAX_N_THREADS      512
  #define GGML_MAX_OP_PARAMS      64
@@ -657,6 +656,7 @@ extern "C" {
      };
  
      // scratch buffer
+    // TODO: deprecate and remove
      struct ggml_scratch {
          size_t offs;
          size_t size;
@@ -760,8 +760,9 @@ extern "C" {
  
      // main
  
-    GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
-    GGML_API void                  ggml_free(struct ggml_context * ctx);
+    GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
+    GGML_API void                  ggml_reset(struct ggml_context * ctx);
+    GGML_API void                  ggml_free (struct ggml_context * ctx);
  
      GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx);
  
diff --git a/src/ggml.c b/src/ggml.c

index 66df9a9c1e621a58009aa27874d0aa2a0e096f43..a4359e7dd05affff77b7578b037d33804925e369 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -306,6 +306,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
  }
  
  #define GGML_DEBUG 0
+
  #define GGML_GELU_FP16
  #define GGML_GELU_QUICK_FP16
  
@@ -2014,7 +2015,7 @@ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
  
  struct ggml_context {
      size_t mem_size;
-    void* mem_buffer;
+    void * mem_buffer;
      bool   mem_buffer_owned;
      bool   no_alloc;
      bool   no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
@@ -3263,7 +3264,6 @@ struct ggml_numa_nodes {
  //
  
  struct ggml_state {
-    struct ggml_context_container contexts[GGML_MAX_CONTEXTS];
      struct ggml_numa_nodes numa;
  };
  
@@ -3845,7 +3845,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
              const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
  
              g_state = (struct ggml_state) {
-                /*.contexts =*/ { { 0 } },
                  /*.numa =*/ {
                      .n_nodes = 0,
                      .total_cpus = 0,
@@ -3864,26 +3863,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
          is_first_call = false;
      }
  
-    // find non-used context in g_state
-    struct ggml_context * ctx = NULL;
-
-    for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
-        if (!g_state.contexts[i].used) {
-            g_state.contexts[i].used = true;
-            ctx = &g_state.contexts[i].context;
-
-            GGML_PRINT_DEBUG("%s: found unused context %d\n", __func__, i);
-            break;
-        }
-    }
-
-    if (ctx == NULL) {
-        GGML_PRINT_DEBUG("%s: no unused context found\n", __func__);
-
-        ggml_critical_section_end();
+    ggml_critical_section_end();
  
-        return NULL;
-    }
+    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
  
      // allow to call ggml_init with 0 size
      if (params.mem_size == 0) {
@@ -3911,42 +3893,31 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
  
      GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
  
-    ggml_critical_section_end();
-
      return ctx;
  }
  
-void ggml_free(struct ggml_context * ctx) {
+void ggml_reset(struct ggml_context * ctx) {
      if (ctx == NULL) {
          return;
      }
  
-    // make this function thread safe
-    ggml_critical_section_start();
-
-    bool found = false;
-
-    for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
-        if (&g_state.contexts[i].context == ctx) {
-            g_state.contexts[i].used = false;
-
-            GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
-                    __func__, i, ggml_used_mem(ctx));
-
-            if (ctx->mem_buffer_owned) {
-                ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
-            }
+    ctx->n_objects     = 0;
+    ctx->objects_begin = NULL;
+    ctx->objects_end   = NULL;
+    ctx->scratch       = (struct ggml_scratch) { 0, 0, NULL, };
+    ctx->scratch_save  = (struct ggml_scratch) { 0, 0, NULL, };
+}
  
-            found = true;
-            break;
-        }
+void ggml_free(struct ggml_context * ctx) {
+    if (ctx == NULL) {
+        return;
      }
  
-    if (!found) {
-        GGML_PRINT_DEBUG("%s: context not found\n", __func__);
+    if (ctx->mem_buffer_owned) {
+        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
      }
  
-    ggml_critical_section_end();
+    GGML_FREE(ctx);
  }
  
  size_t ggml_used_mem(const struct ggml_context * ctx) {
author	Georgi Gerganov <redacted>
	Fri, 1 Nov 2024 08:23:05 +0000 (10:23 +0200)
committer	Georgi Gerganov <redacted>
	Fri, 1 Nov 2024 08:23:05 +0000 (10:23 +0200)
include/ggml.h		patch \| blob \| history
src/ggml.c		patch \| blob \| history