metal : add option to disable debug logs (close #2764)

author Georgi Gerganov <redacted>

Tue, 29 Aug 2023 08:33:46 +0000 (11:33 +0300)

committer Georgi Gerganov <redacted>

Tue, 29 Aug 2023 08:33:46 +0000 (11:33 +0300)
author Georgi Gerganov <redacted>
Tue, 29 Aug 2023 08:33:46 +0000 (11:33 +0300)
committer Georgi Gerganov <redacted>
Tue, 29 Aug 2023 08:33:46 +0000 (11:33 +0300)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index ba008bcc66da50bc892af1897f80442eefd90c8b..1eae2d670c0be101547cdbb953e73f04391ab885 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -301,7 +301,7 @@ if (LLAMA_METAL)
      set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
  
      add_compile_definitions(GGML_USE_METAL)
-    add_compile_definitions(GGML_METAL_NDEBUG)
+    #add_compile_definitions(GGML_METAL_NDEBUG)
  
      # get full path to the file
      #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
diff --git a/Makefile b/Makefile

index e60821dd580eb2f60163463b433283fbda18929b..a64374e7df3ac4620488fc4b36f2a0d58fc63be7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -305,7 +305,7 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
  endif # LLAMA_HIPBLAS
  
  ifdef LLAMA_METAL
-       CFLAGS   += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
+       CFLAGS   += -DGGML_USE_METAL #-DGGML_METAL_NDEBUG
         CXXFLAGS += -DGGML_USE_METAL
         LDFLAGS  += -framework Foundation -framework Metal -framework MetalKit
         OBJS     += ggml-metal.o
diff --git a/ggml-metal.m b/ggml-metal.m

index ad2ee8cf5fef098169d765b7fa92b12f57c9f70c..e929c4b07cadd054119bdec9cd6ad0c75f26b631 100644 (file)
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -11,6 +11,7 @@
  #define MIN(a, b) ((a) < (b) ? (a) : (b))
  #define MAX(a, b) ((a) > (b) ? (a) : (b))
  
+// TODO: temporary - reuse llama.cpp logging
  #ifdef GGML_METAL_NDEBUG
  #define metal_printf(...)
  #else
@@ -113,7 +114,7 @@ static NSString * const msl_library_source = @"see metal.metal";
  @end
  
  struct ggml_metal_context * ggml_metal_init(int n_cb) {
-    fprintf(stderr, "%s: allocating\n", __func__);
+    metal_printf("%s: allocating\n", __func__);
  
      struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
  
@@ -132,7 +133,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
  
          ctx->library = [ctx->device newLibraryWithSource:msl_library_source options:nil error:&error];
          if (error) {
-            fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
+            metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
              return NULL;
          }
      }
@@ -146,11 +147,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
          //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
          NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
          NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
-        fprintf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]);
+        metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]);
  
          NSString * src  = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
          if (error) {
-            fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
+            metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
              return NULL;
          }
  
@@ -162,7 +163,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
          ctx->library = [ctx->device newLibraryWithSource:src options:nil error:&error];
  #endif
          if (error) {
-            fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
+            metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
              return NULL;
          }
      }
@@ -174,11 +175,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
  #define GGML_METAL_ADD_KERNEL(name) \
          ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
          ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
-        fprintf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
+        metal_printf("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
                  (int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
                  (int) ctx->pipeline_##name.threadExecutionWidth); \
          if (error) { \
-            fprintf(stderr, "%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
+            metal_printf("%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
              return NULL; \
          }
  
@@ -230,19 +231,19 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
  #undef GGML_METAL_ADD_KERNEL
      }
  
-    fprintf(stderr, "%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-    fprintf(stderr, "%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+    metal_printf("%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+    metal_printf("%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
      if (ctx->device.maxTransferRate != 0) {
-        fprintf(stderr, "%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+        metal_printf("%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
      } else {
-        fprintf(stderr, "%s: maxTransferRate               = built-in GPU\n", __func__);
+        metal_printf("%s: maxTransferRate               = built-in GPU\n", __func__);
      }
  
      return ctx;
  }
  
  void ggml_metal_free(struct ggml_metal_context * ctx) {
-    fprintf(stderr, "%s: deallocating\n", __func__);
+    metal_printf("%s: deallocating\n", __func__);
  #define GGML_METAL_DEL_KERNEL(name) \
      [ctx->function_##name release]; \
      [ctx->pipeline_##name release];
@@ -311,7 +312,7 @@ void * ggml_metal_host_malloc(size_t n) {
      void * data = NULL;
      const int result = posix_memalign((void **) &data, getpagesize(), n);
      if (result != 0) {
-        fprintf(stderr, "%s: error: posix_memalign failed\n", __func__);
+        metal_printf("%s: error: posix_memalign failed\n", __func__);
          return NULL;
      }
  
@@ -339,7 +340,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx) {
  // Metal buffer based on the host memory pointer
  //
  static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
-    //fprintf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
+    //metal_printf("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
  
      const int64_t tsize = ggml_nbytes(t);
  
@@ -350,13 +351,13 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
          if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
              *offs = (size_t) ioffs;
  
-            //fprintf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
+            //metal_printf("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
  
              return ctx->buffers[i].metal;
          }
      }
  
-    fprintf(stderr, "%s: error: buffer is nil\n", __func__);
+    metal_printf("%s: error: buffer is nil\n", __func__);
  
      return nil;
  }
@@ -368,7 +369,7 @@ bool ggml_metal_add_buffer(
                           size_t   size,
                           size_t   max_size) {
      if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) {
-        fprintf(stderr, "%s: too many buffers\n", __func__);
+        metal_printf("%s: too many buffers\n", __func__);
          return false;
      }
  
@@ -378,7 +379,7 @@ bool ggml_metal_add_buffer(
              const int64_t ioffs = (int64_t) data - (int64_t) ctx->buffers[i].data;
  
              if (ioffs >= 0 && ioffs < (int64_t) ctx->buffers[i].size) {
-                fprintf(stderr, "%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
+                metal_printf("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
                  return false;
              }
          }
@@ -399,11 +400,11 @@ bool ggml_metal_add_buffer(
              ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
  
              if (ctx->buffers[ctx->n_buffers].metal == nil) {
-                fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
+                metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
                  return false;
              }
  
-            fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
+            metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
  
              ++ctx->n_buffers;
          } else {
@@ -423,27 +424,27 @@ bool ggml_metal_add_buffer(
                  ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
  
                  if (ctx->buffers[ctx->n_buffers].metal == nil) {
-                    fprintf(stderr, "%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
+                    metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
                      return false;
                  }
  
-                fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
+                metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
                  if (i + size_step < size) {
-                    fprintf(stderr, "\n");
+                    metal_printf("\n");
                  }
  
                  ++ctx->n_buffers;
              }
          }
  
-        fprintf(stderr, ", (%8.2f / %8.2f)",
+        metal_printf(", (%8.2f / %8.2f)",
                  ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
                  ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
  
          if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
-            fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
+            metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
          } else {
-            fprintf(stderr, "\n");
+            metal_printf("\n");
          }
      }
  
@@ -453,8 +454,6 @@ bool ggml_metal_add_buffer(
  void ggml_metal_set_tensor(
          struct ggml_metal_context * ctx,
          struct ggml_tensor * t) {
-    metal_printf("%s: set input for tensor '%s'\n", __func__, t->name);
-
      size_t offs;
      id<MTLBuffer> id_dst = ggml_metal_get_buffer(ctx, t, &offs);
  
@@ -464,8 +463,6 @@ void ggml_metal_set_tensor(
  void ggml_metal_get_tensor(
          struct ggml_metal_context * ctx,
          struct ggml_tensor * t) {
-    metal_printf("%s: extract results for tensor '%s'\n", __func__, t->name);
-
      size_t offs;
      id<MTLBuffer> id_src = ggml_metal_get_buffer(ctx, t, &offs);
  
@@ -560,15 +557,13 @@ void ggml_metal_graph_find_concurrency(
      }
  
      if (ctx->concur_list_len > GGML_MAX_CONCUR) {
-        fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__);
+        metal_printf("%s: too many elements for metal ctx->concur_list!\n", __func__);
      }
  }
  
  void ggml_metal_graph_compute(
          struct ggml_metal_context * ctx,
                 struct ggml_cgraph * gf) {
-    metal_printf("%s: evaluating graph\n", __func__);
-
      @autoreleasepool {
  
      // if there is ctx->concur_list, dispatch concurrently
@@ -616,7 +611,7 @@ void ggml_metal_graph_compute(
                      continue;
                  }
  
-                metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
+                //metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
  
                  struct ggml_tensor * src0 = gf->nodes[i]->src[0];
                  struct ggml_tensor * src1 = gf->nodes[i]->src[1];
@@ -764,7 +759,7 @@ void ggml_metal_graph_compute(
                                  } break;
                              default:
                                  {
-                                    fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
+                                    metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
                                      GGML_ASSERT(false);
                                  }
                          } break;
@@ -923,7 +918,7 @@ void ggml_metal_graph_compute(
                                          } break;
                                      default:
                                          {
-                                            fprintf(stderr, "Asserting on type %d\n",(int)src0t);
+                                            metal_printf("Asserting on type %d\n",(int)src0t);
                                              GGML_ASSERT(false && "not implemented");
                                          }
                                  };
@@ -1161,7 +1156,7 @@ void ggml_metal_graph_compute(
                          } break;
                      default:
                          {
-                            fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
+                            metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
                              GGML_ASSERT(false);
                          }
                  }
@@ -1186,7 +1181,7 @@ void ggml_metal_graph_compute(
  
          MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
          if (status != MTLCommandBufferStatusCompleted) {
-            fprintf(stderr, "%s: command buffer %d failed with status %lu\n", __func__, i, status);
+            metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status);
              GGML_ASSERT(false);
          }
      }
author	Georgi Gerganov <redacted>
	Tue, 29 Aug 2023 08:33:46 +0000 (11:33 +0300)
committer	Georgi Gerganov <redacted>
	Tue, 29 Aug 2023 08:33:46 +0000 (11:33 +0300)
CMakeLists.txt		patch \| blob \| history
Makefile		patch \| blob \| history
ggml-metal.m		patch \| blob \| history