ggml : remove ggml_scratch (llama/10121)

author Georgi Gerganov <redacted>

Fri, 1 Nov 2024 10:58:45 +0000 (12:58 +0200)

committer Georgi Gerganov <redacted>

Mon, 4 Nov 2024 17:42:09 +0000 (19:42 +0200)
author Georgi Gerganov <redacted>
Fri, 1 Nov 2024 10:58:45 +0000 (12:58 +0200)
committer Georgi Gerganov <redacted>
Mon, 4 Nov 2024 17:42:09 +0000 (19:42 +0200)
diff --git a/include/ggml.h b/include/ggml.h

index e5862246c8c854d122b06e7d891454334faf3ba5..41df85557c1ef0721ba0b31530691472a0cbef1f 100644 (file)
--- a/include/ggml.h
+++ b/include/ggml.h
@@ -655,14 +655,6 @@ extern "C" {
          void *              abort_callback_data;
      };
  
-    // scratch buffer
-    // TODO: deprecate and remove
-    struct ggml_scratch {
-        size_t offs;
-        size_t size;
-        void * data;
-    };
-
      struct ggml_init_params {
          // memory pool
          size_t mem_size;   // bytes
@@ -766,7 +758,6 @@ extern "C" {
  
      GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx);
  
-    GGML_API size_t  ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
      GGML_API bool    ggml_get_no_alloc(struct ggml_context * ctx);
      GGML_API void    ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
  
diff --git a/src/ggml.c b/src/ggml.c

index 59f2ed043500a45ccab7d05e251af6fc2fb586ef..84f2c766bd8139523fb061a86b496705c4246da5 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -2018,15 +2018,11 @@ struct ggml_context {
      void * mem_buffer;
      bool   mem_buffer_owned;
      bool   no_alloc;
-    bool   no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
  
      int    n_objects;
  
      struct ggml_object * objects_begin;
      struct ggml_object * objects_end;
-
-    struct ggml_scratch scratch;
-    struct ggml_scratch scratch_save;
  };
  
  struct ggml_context_container {
@@ -3879,12 +3875,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
          /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
          /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
          /*.no_alloc           =*/ params.no_alloc,
-        /*.no_alloc_save      =*/ params.no_alloc,
          /*.n_objects          =*/ 0,
          /*.objects_begin      =*/ NULL,
          /*.objects_end        =*/ NULL,
-        /*.scratch            =*/ { 0, 0, NULL, },
-        /*.scratch_save       =*/ { 0, 0, NULL, },
      };
  
      GGML_ASSERT(ctx->mem_buffer != NULL);
@@ -3904,8 +3897,6 @@ void ggml_reset(struct ggml_context * ctx) {
      ctx->n_objects     = 0;
      ctx->objects_begin = NULL;
      ctx->objects_end   = NULL;
-    ctx->scratch       = (struct ggml_scratch) { 0, 0, NULL, };
-    ctx->scratch_save  = (struct ggml_scratch) { 0, 0, NULL, };
  }
  
  void ggml_free(struct ggml_context * ctx) {
@@ -3924,14 +3915,6 @@ size_t ggml_used_mem(const struct ggml_context * ctx) {
      return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
  }
  
-size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) {
-    const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0;
-
-    ctx->scratch = scratch;
-
-    return result;
-}
-
  bool ggml_get_no_alloc(struct ggml_context * ctx) {
      return ctx->no_alloc;
  }
@@ -3959,27 +3942,6 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
      return max_size;
  }
  
-// IMPORTANT:
-// when creating "opt" tensors, always save and load the scratch buffer
-// this is an error prone process, but it is necessary to support inplace
-// operators when using scratch buffers
-// TODO: implement a better way
-static void ggml_scratch_save(struct ggml_context * ctx) {
-    // this is needed to allow opt tensors to store their data
-    // TODO: again, need to find a better way
-    ctx->no_alloc_save = ctx->no_alloc;
-    ctx->no_alloc      = false;
-
-    ctx->scratch_save = ctx->scratch;
-    ctx->scratch.data = NULL;
-}
-
-static void ggml_scratch_load(struct ggml_context * ctx) {
-    ctx->no_alloc = ctx->no_alloc_save;
-
-    ctx->scratch = ctx->scratch_save;
-}
-
  ////////////////////////////////////////////////////////////////////////////////
  
  static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
@@ -4060,29 +4022,13 @@ static struct ggml_tensor * ggml_new_tensor_impl(
      size_t obj_alloc_size = 0;
  
      if (view_src == NULL && !ctx->no_alloc) {
-        if (ctx->scratch.data != NULL) {
-            // allocate tensor data in the scratch buffer
-            if (ctx->scratch.offs + data_size > ctx->scratch.size) {
-                GGML_LOG_WARN("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
-                        __func__, ctx->scratch.offs + data_size, ctx->scratch.size);
-                assert(false);
-                return NULL;
-            }
-
-            data = (char * const) ctx->scratch.data + ctx->scratch.offs;
-
-            ctx->scratch.offs += data_size;
-        } else {
-            // allocate tensor data in the context's memory pool
-            obj_alloc_size = data_size;
-        }
+        // allocate tensor data in the context's memory pool
+        obj_alloc_size = data_size;
      }
  
      struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
      GGML_ASSERT(obj_new);
  
-    // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
-
      struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
  
  #ifdef __clang__
@@ -4178,24 +4124,16 @@ struct ggml_tensor * ggml_new_tensor_4d(
  }
  
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
-    ggml_scratch_save(ctx);
-
      struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
  
-    ggml_scratch_load(ctx);
-
      ggml_set_i32(result, value);
  
      return result;
  }
  
  struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
-    ggml_scratch_save(ctx);
-
      struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
  
-    ggml_scratch_load(ctx);
-
      ggml_set_f32(result, value);
  
      return result;
@@ -20263,7 +20201,6 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
      uint64_t size_eval = 0;
  
      // compute size of intermediate results
-    // TODO: does not take into account scratch buffers !!!!
      for (int i = 0; i < cgraph->n_nodes; ++i) {
          size_eval += ggml_nbytes_pad(cgraph->nodes[i]);
      }
author	Georgi Gerganov <redacted>
	Fri, 1 Nov 2024 10:58:45 +0000 (12:58 +0200)
committer	Georgi Gerganov <redacted>
	Mon, 4 Nov 2024 17:42:09 +0000 (19:42 +0200)
include/ggml.h		patch \| blob \| history
src/ggml.c		patch \| blob \| history