metal : remove unused `n_buffers` and `buffers` (llama/5129)

author Paul Tsochantaris <redacted>

Fri, 26 Jan 2024 12:16:07 +0000 (12:16 +0000)

committer Georgi Gerganov <redacted>

Sat, 27 Jan 2024 15:19:52 +0000 (17:19 +0200)
author Paul Tsochantaris <redacted>
Fri, 26 Jan 2024 12:16:07 +0000 (12:16 +0000)
committer Georgi Gerganov <redacted>
Sat, 27 Jan 2024 15:19:52 +0000 (17:19 +0200)
diff --git a/ggml-metal.m b/ggml-metal.m

index 60fef1a1912b599ae9f8ec0efc574746fdf1daaf..ab3c84f7fd9e9cdc9d36d0130cdcd1d26545fd95 100644 (file)
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -26,15 +26,6 @@
  
  #define GGML_METAL_MAX_KERNELS 256
  
-struct ggml_metal_buffer {
-    const char * name;
-
-    void   * data;
-    size_t   size;
-
-    id<MTLBuffer> metal;
-};
-
  struct ggml_metal_kernel {
      id<MTLFunction>             function;
      id<MTLComputePipelineState> pipeline;
@@ -172,9 +163,6 @@ struct ggml_metal_context {
  
      dispatch_queue_t d_queue;
  
-    int n_buffers;
-    struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
-
      struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
  
      bool support_simdgroup_reduction;
@@ -242,24 +230,20 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
      // Show all the Metal device instances in the system
      NSArray * devices = MTLCopyAllDevices();
      for (id<MTLDevice> device in devices) {
-        NSString * s = [device name];
-        GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
+        GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
      }
      [devices release]; // since it was created by a *Copy* C method
  #endif
  
      // Pick and show default Metal device
      id<MTLDevice> device = MTLCreateSystemDefaultDevice();
-    NSString * s = [device name];
-    GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
+    GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
  
      // Configure context
      struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
      ctx->device = device;
      ctx->n_cb   = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
      ctx->queue  = [ctx->device newCommandQueue];
-    ctx->n_buffers = 0;
-
      ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
  
      // load library
@@ -534,10 +518,6 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
  static void ggml_metal_free(struct ggml_metal_context * ctx) {
      GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
  
-    for (int i = 0; i < ctx->n_buffers; ++i) {
-        [ctx->buffers[i].metal release];
-    }
-
      for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
          if (ctx->kernels[i].pipeline) {
              [ctx->kernels[i].pipeline release];
@@ -580,51 +560,30 @@ struct ggml_backend_metal_buffer_context {
  // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
  // Metal buffer based on the host memory pointer
  //
-static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
+static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
      //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
  
      const int64_t tsize = ggml_nbytes(t);
  
      ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
  
-    // compatibility with ggml-backend
-    if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
-        struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
-
-        // find the view that contains the tensor fully
-        for (int i = 0; i < buf_ctx->n_buffers; ++i) {
-            const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
-
-            //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
-            if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
-                *offs = (size_t) ioffs;
-
-                //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
-
-                return buf_ctx->buffers[i].metal;
-            }
-        }
-
-        GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
-
-        return nil;
-    }
+    struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
  
      // find the view that contains the tensor fully
-    for (int i = 0; i < ctx->n_buffers; ++i) {
-        const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
+    for (int i = 0; i < buf_ctx->n_buffers; ++i) {
+        const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
  
-        //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name);
-        if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
+        //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
+        if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
              *offs = (size_t) ioffs;
  
-            //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
+            //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
  
-            return ctx->buffers[i].metal;
+            return buf_ctx->buffers[i].metal;
          }
      }
  
-    GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
+    GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
  
      return nil;
  }
@@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
              const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
              const enum ggml_type dstt  = dst  ? dst->type  : GGML_TYPE_COUNT;
  
-            id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil;
-            id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
-            id<MTLBuffer> id_dst  = dst  ? ggml_metal_get_buffer(ctx, dst,  &offs_dst)  : nil;
+            id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
+            id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
+            id<MTLBuffer> id_dst  = dst  ? ggml_metal_get_buffer(dst,  &offs_dst)  : nil;
  
              //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
              //if (src0) {
@@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
                                  struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
  
                                  size_t offs_src_cur = 0;
-                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
+                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
  
                                  [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
                              }
@@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
                                  struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
  
                                  size_t offs_src_cur = 0;
-                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
+                                id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
  
                                  [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
                              }
author	Paul Tsochantaris <redacted>
	Fri, 26 Jan 2024 12:16:07 +0000 (12:16 +0000)
committer	Georgi Gerganov <redacted>
	Sat, 27 Jan 2024 15:19:52 +0000 (17:19 +0200)