metal : reusing llama.cpp logging (#3152)

author Rickard Hallerbäck <redacted>

Wed, 27 Sep 2023 15:48:33 +0000 (17:48 +0200)

committer GitHub <redacted>

Wed, 27 Sep 2023 15:48:33 +0000 (18:48 +0300)
author Rickard Hallerbäck <redacted>
Wed, 27 Sep 2023 15:48:33 +0000 (17:48 +0200)
committer GitHub <redacted>
Wed, 27 Sep 2023 15:48:33 +0000 (18:48 +0300)
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp

index 34ddfde39d29575fa581575fafbf2a2bfd90c6f6..2f1a1d9ff5645e9c36414d24723c68aa3e84bf99 100644 (file)
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -903,7 +903,7 @@ static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads)
      }
  }
  
-static void llama_null_log_callback(enum llama_log_level level, const char * text, void * user_data) {
+static void llama_null_log_callback(enum ggml_log_level level, const char * text, void * user_data) {
      (void) level;
      (void) text;
      (void) user_data;
diff --git a/ggml-metal.h b/ggml-metal.h

index fca28d37ef97069ca7800fc890ea4d013c46437f..790cf0bf7b963d9c1f52a45dbac5be71e0662256 100644 (file)
--- a/ggml-metal.h
+++ b/ggml-metal.h
@@ -19,6 +19,8 @@
  
  #pragma once
  
+#include "ggml.h"
+
  #include <stddef.h>
  #include <stdbool.h>
  
@@ -33,6 +35,8 @@ struct ggml_cgraph;
  extern "C" {
  #endif
  
+void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
+
  struct ggml_metal_context;
  
  // number of command buffers to use
diff --git a/ggml-metal.m b/ggml-metal.m

index 1139ee3114610e05726b7e4319843eade838271c..654eb67f3392b3c3d2fd1a67496297e2feec2d9d 100644 (file)
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -11,11 +11,14 @@
  #define MIN(a, b) ((a) < (b) ? (a) : (b))
  #define MAX(a, b) ((a) > (b) ? (a) : (b))
  
-// TODO: temporary - reuse llama.cpp logging
  #ifdef GGML_METAL_NDEBUG
-#define metal_printf(...)
+#define GGML_METAL_LOG_INFO(...)
+#define GGML_METAL_LOG_WARN(...)
+#define GGML_METAL_LOG_ERROR(...)
  #else
-#define metal_printf(...) fprintf(stderr, __VA_ARGS__)
+#define GGML_METAL_LOG_INFO(...)  ggml_metal_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
+#define GGML_METAL_LOG_WARN(...)  ggml_metal_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
+#define GGML_METAL_LOG_ERROR(...) ggml_metal_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
  #endif
  
  #define UNUSED(x) (void)(x)
@@ -120,8 +123,37 @@ static NSString * const msl_library_source = @"see metal.metal";
  @implementation GGMLMetalClass
  @end
  
+ggml_log_callback ggml_metal_log_callback = NULL;
+void * ggml_metal_log_user_data = NULL;
+
+void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {
+    ggml_metal_log_callback  = log_callback;
+    ggml_metal_log_user_data = user_data;
+}
+
+static void ggml_metal_log(enum ggml_log_level level, const char* format, ...){
+    if (ggml_metal_log_callback != NULL) {
+        va_list args;
+        va_start(args, format);
+        char buffer[128];
+        int len = vsnprintf(buffer, 128, format, args);
+        if (len < 128) {
+            ggml_metal_log_callback(level, buffer, ggml_metal_log_user_data);
+        } else {
+            char* buffer2 = malloc(len+1);
+            vsnprintf(buffer2, len+1, format, args);
+            buffer2[len] = 0;
+            ggml_metal_log_callback(level, buffer2, ggml_metal_log_user_data);
+            free(buffer2);
+        }
+        va_end(args);
+    }
+}
+
+
+
  struct ggml_metal_context * ggml_metal_init(int n_cb) {
-    metal_printf("%s: allocating\n", __func__);
+    GGML_METAL_LOG_INFO("%s: allocating\n", __func__);
  
      id <MTLDevice> device;
      NSString * s;
@@ -131,14 +163,14 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
      NSArray * devices = MTLCopyAllDevices();
      for (device in devices) {
          s = [device name];
-        metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
+        GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
      }
  #endif
  
      // Pick and show default Metal device
      device = MTLCreateSystemDefaultDevice();
      s = [device name];
-    metal_printf("%s: picking default device: %s\n", __func__, [s UTF8String]);
+    GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
  
      // Configure context
      struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
@@ -165,7 +197,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
          ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
  
          if (error) {
-            metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
+            GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
              return NULL;
          }
      }
@@ -179,11 +211,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
          //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
          NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
          NSString * path   = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
-        metal_printf("%s: loading '%s'\n", __func__, [path UTF8String]);
+        GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path UTF8String]);
  
          NSString * src  = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
          if (error) {
-            metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
+            GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
              return NULL;
          }
  
@@ -195,7 +227,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
          ctx->library = [ctx->device newLibraryWithSource:src options:nil error:&error];
  #endif
          if (error) {
-            metal_printf("%s: error: %s\n", __func__, [[error description] UTF8String]);
+            GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
              return NULL;
          }
      }
@@ -207,11 +239,11 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
  #define GGML_METAL_ADD_KERNEL(name) \
          ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
          ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
-        metal_printf("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
+        GGML_METAL_LOG_INFO("%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
                  (int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
                  (int) ctx->pipeline_##name.threadExecutionWidth); \
          if (error) { \
-            metal_printf("%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
+          GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
              return NULL; \
          }
  
@@ -270,13 +302,13 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
  #undef GGML_METAL_ADD_KERNEL
      }
  
-    metal_printf("%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+    GGML_METAL_LOG_INFO("%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
  #if TARGET_OS_OSX
-    metal_printf("%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+    GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
      if (ctx->device.maxTransferRate != 0) {
-        metal_printf("%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+        GGML_METAL_LOG_INFO("%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
      } else {
-        metal_printf("%s: maxTransferRate               = built-in GPU\n", __func__);
+        GGML_METAL_LOG_INFO("%s: maxTransferRate               = built-in GPU\n", __func__);
      }
  #endif
  
@@ -284,7 +316,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
  }
  
  void ggml_metal_free(struct ggml_metal_context * ctx) {
-    metal_printf("%s: deallocating\n", __func__);
+    GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
  #define GGML_METAL_DEL_KERNEL(name) \
      [ctx->function_##name release]; \
      [ctx->pipeline_##name release];
@@ -360,7 +392,7 @@ void * ggml_metal_host_malloc(size_t n) {
      void * data = NULL;
      const int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n);
      if (result != 0) {
-        metal_printf("%s: error: posix_memalign failed\n", __func__);
+        GGML_METAL_LOG_ERROR("%s: error: posix_memalign failed\n", __func__);
          return NULL;
      }
  
@@ -388,7 +420,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx) {
  // Metal buffer based on the host memory pointer
  //
  static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
-    //metal_printf("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
+    //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
  
      const int64_t tsize = ggml_nbytes(t);
  
@@ -400,13 +432,13 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
          if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
              *offs = (size_t) ioffs;
  
-            //metal_printf("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
+            //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
  
              return ctx->buffers[i].metal;
          }
      }
  
-    metal_printf("%s: error: buffer is nil\n", __func__);
+    GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
  
      return nil;
  }
@@ -418,7 +450,7 @@ bool ggml_metal_add_buffer(
                           size_t   size,
                           size_t   max_size) {
      if (ctx->n_buffers >= GGML_METAL_MAX_BUFFERS) {
-        metal_printf("%s: too many buffers\n", __func__);
+        GGML_METAL_LOG_ERROR("%s: error: too many buffers\n", __func__);
          return false;
      }
  
@@ -428,7 +460,7 @@ bool ggml_metal_add_buffer(
              const int64_t ioffs = (int64_t) data - (int64_t) ctx->buffers[i].data;
  
              if (ioffs >= 0 && ioffs < (int64_t) ctx->buffers[i].size) {
-                metal_printf("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
+                GGML_METAL_LOG_ERROR("%s: error: buffer '%s' overlaps with '%s'\n", __func__, name, ctx->buffers[i].name);
                  return false;
              }
          }
@@ -449,11 +481,11 @@ bool ggml_metal_add_buffer(
              ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
  
              if (ctx->buffers[ctx->n_buffers].metal == nil) {
-                metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
+                GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
                  return false;
              }
  
-            metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
+            GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
  
              ++ctx->n_buffers;
          } else {
@@ -473,13 +505,13 @@ bool ggml_metal_add_buffer(
                  ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
  
                  if (ctx->buffers[ctx->n_buffers].metal == nil) {
-                    metal_printf("%s: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
+                    GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
                      return false;
                  }
  
-                metal_printf("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
+                GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
                  if (i + size_step < size) {
-                    metal_printf("\n");
+                    GGML_METAL_LOG_INFO("\n");
                  }
  
                  ++ctx->n_buffers;
@@ -487,17 +519,17 @@ bool ggml_metal_add_buffer(
          }
  
  #if TARGET_OS_OSX
-        metal_printf(", (%8.2f / %8.2f)",
+        GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
                  ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
                  ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
  
          if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
-            metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
+            GGML_METAL_LOG_WARN(", warning: current allocated size is greater than the recommended max working set size\n", __func__);
          } else {
-            metal_printf("\n");
+            GGML_METAL_LOG_INFO("\n");
          }
  #else
-        metal_printf(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
+        GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
  #endif
      }
  
@@ -610,7 +642,7 @@ void ggml_metal_graph_find_concurrency(
      }
  
      if (ctx->concur_list_len > GGML_MAX_CONCUR) {
-        metal_printf("%s: too many elements for metal ctx->concur_list!\n", __func__);
+        GGML_METAL_LOG_WARN("%s: too many elements for metal ctx->concur_list!\n", __func__);
      }
  }
  
@@ -664,7 +696,7 @@ void ggml_metal_graph_compute(
                      continue;
                  }
  
-                //metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
+                //GGML_METAL_LOG_INFO("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
  
                  struct ggml_tensor * src0 = gf->nodes[i]->src[0];
                  struct ggml_tensor * src1 = gf->nodes[i]->src[1];
@@ -708,17 +740,17 @@ void ggml_metal_graph_compute(
                  id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
                  id<MTLBuffer> id_dst  = dst  ? ggml_metal_get_buffer(ctx, dst,  &offs_dst)  : nil;
  
-                //metal_printf("%s: op - %s\n", __func__, ggml_op_name(dst->op));
+                //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
                  //if (src0) {
-                //    metal_printf("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
+                //    GGML_METAL_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
                  //            ggml_is_contiguous(src0), src0->name);
                  //}
                  //if (src1) {
-                //    metal_printf("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
+                //    GGML_METAL_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
                  //            ggml_is_contiguous(src1), src1->name);
                  //}
                  //if (dst) {
-                //    metal_printf("%s: dst  - %4s [%5lld, %5lld, %5lld], 1, %s\n",  __func__, ggml_type_name(dstt),  ne0,  ne1,  ne2,
+                //    GGML_METAL_LOG_INFO("%s: dst  - %4s [%5lld, %5lld, %5lld], 1, %s\n",  __func__, ggml_type_name(dstt),  ne0,  ne1,  ne2,
                  //            dst->name);
                  //}
  
@@ -830,7 +862,7 @@ void ggml_metal_graph_compute(
                                  } break;
                              default:
                                  {
-                                    metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
+                                    GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
                                      GGML_ASSERT(false);
                                  }
                          } break;
@@ -1019,7 +1051,7 @@ void ggml_metal_graph_compute(
                                          } break;
                                      default:
                                          {
-                                            metal_printf("Asserting on type %d\n",(int)src0t);
+                                            GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
                                              GGML_ASSERT(false && "not implemented");
                                          }
                                  };
@@ -1261,7 +1293,7 @@ void ggml_metal_graph_compute(
                          } break;
                      default:
                          {
-                            metal_printf("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
+                            GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
                              GGML_ASSERT(false);
                          }
                  }
@@ -1286,7 +1318,7 @@ void ggml_metal_graph_compute(
  
          MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
          if (status != MTLCommandBufferStatusCompleted) {
-            metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status);
+            GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
              GGML_ASSERT(false);
          }
      }
diff --git a/ggml.h b/ggml.h

index f45456876da621d5beafdc2419d9d575a9fb02ff..b2251acef271edc12fc9292e74a49df6551e0e5f 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -445,6 +445,12 @@ extern "C" {
          GGML_OBJECT_WORK_BUFFER
      };
  
+    enum ggml_log_level {
+        GGML_LOG_LEVEL_ERROR = 2,
+        GGML_LOG_LEVEL_WARN = 3,
+        GGML_LOG_LEVEL_INFO = 4
+    };
+
      // ggml object
      struct ggml_object {
          size_t offs;
@@ -1691,6 +1697,7 @@ extern "C" {
      };
  
      typedef void (*ggml_opt_callback)(void * data, float * sched);
+    typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
  
      // optimization parameters
      //
diff --git a/llama.cpp b/llama.cpp

index 346636501ce15c8a9b73e28e9b2d4bbf1d273440..1327fde6f5f2cbd758768623884e51eccdfb83c6 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -92,12 +92,12 @@
  //
  
  LLAMA_ATTRIBUTE_FORMAT(2, 3)
-static void llama_log_internal        (llama_log_level level, const char* format, ...);
-static void llama_log_callback_default(llama_log_level level, const char * text, void * user_data);
+static void llama_log_internal        (ggml_log_level level, const char* format, ...);
+static void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
  
-#define LLAMA_LOG_INFO(...)  llama_log_internal(LLAMA_LOG_LEVEL_INFO , __VA_ARGS__)
-#define LLAMA_LOG_WARN(...)  llama_log_internal(LLAMA_LOG_LEVEL_WARN , __VA_ARGS__)
-#define LLAMA_LOG_ERROR(...) llama_log_internal(LLAMA_LOG_LEVEL_ERROR, __VA_ARGS__)
+#define LLAMA_LOG_INFO(...)  llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
+#define LLAMA_LOG_WARN(...)  llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
+#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
  
  //
  // helpers
@@ -904,7 +904,7 @@ static std::string llama_token_to_str(const struct llama_context * ctx, llama_to
  
  struct llama_state {
      // We save the log callback globally
-    llama_log_callback log_callback = llama_log_callback_default;
+    ggml_log_callback log_callback = llama_log_callback_default;
      void * log_callback_user_data = nullptr;
  };
  
@@ -6366,6 +6366,7 @@ struct llama_context * llama_new_context_with_model(
                      llama_free(ctx);
                      return NULL;
                  }
+                ggml_metal_log_set_callback(llama_log_callback_default, NULL);
                  ggml_metal_graph_find_concurrency(ctx->ctx_metal, gf, false);
                  ggml_allocr_set_parse_seq(ctx->alloc, ggml_metal_get_concur_list(ctx->ctx_metal), ggml_metal_if_optimized(ctx->ctx_metal));
              }
@@ -7199,12 +7200,12 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
      return ctx->model.tensors_by_name;
  }
  
-void llama_log_set(llama_log_callback log_callback, void * user_data) {
+void llama_log_set(ggml_log_callback log_callback, void * user_data) {
      g_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
      g_state.log_callback_user_data = user_data;
  }
  
-static void llama_log_internal_v(llama_log_level level, const char * format, va_list args) {
+static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) {
      va_list args_copy;
      va_copy(args_copy, args);
      char buffer[128];
@@ -7221,14 +7222,14 @@ static void llama_log_internal_v(llama_log_level level, const char * format, va_
      va_end(args_copy);
  }
  
-static void llama_log_internal(llama_log_level level, const char * format, ...) {
+static void llama_log_internal(ggml_log_level level, const char * format, ...) {
      va_list args;
      va_start(args, format);
      llama_log_internal_v(level, format, args);
      va_end(args);
  }
  
-static void llama_log_callback_default(llama_log_level level, const char * text, void * user_data) {
+static void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
      (void) level;
      (void) user_data;
      fputs(text, stderr);
diff --git a/llama.h b/llama.h

index 369be048c001276bcf8d3cea6ca0c5dcebd79a2c..350268b9a94aa361ef734af3e225f200f23fc4f1 100644 (file)
--- a/llama.h
+++ b/llama.h
@@ -62,12 +62,6 @@ extern "C" {
  
      typedef int llama_token;
  
-    enum llama_log_level {
-        LLAMA_LOG_LEVEL_ERROR = 2,
-        LLAMA_LOG_LEVEL_WARN  = 3,
-        LLAMA_LOG_LEVEL_INFO  = 4
-    };
-
      enum llama_vocab_type {
          LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
          LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
@@ -151,13 +145,6 @@ extern "C" {
          bool embedding;  // embedding mode only
      };
  
-    // Signature for logging events
-    // Note that text includes the new line character at the end for most events.
-    // If your logging mechanism cannot handle that, check if the last character is '\n' and strip it
-    // if it exists.
-    // It might not exist for progress report where '.' is output repeatedly.
-    typedef void (*llama_log_callback)(enum llama_log_level level, const char * text, void * user_data);
-
      // model quantization parameters
      typedef struct llama_model_quantize_params {
          int nthread;                 // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
@@ -526,7 +513,7 @@ extern "C" {
  
      // Set callback for all future logging events.
      // If this is not called, or NULL is supplied, everything is output on stderr.
-    LLAMA_API void llama_log_set(llama_log_callback log_callback, void * user_data);
+    LLAMA_API void llama_log_set(ggml_log_callback log_callback, void * user_data);
  
      LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);
author	Rickard Hallerbäck <redacted>
	Wed, 27 Sep 2023 15:48:33 +0000 (17:48 +0200)
committer	GitHub <redacted>
	Wed, 27 Sep 2023 15:48:33 +0000 (18:48 +0300)
examples/llama-bench/llama-bench.cpp		patch \| blob \| history
ggml-metal.h		patch \| blob \| history
ggml-metal.m		patch \| blob \| history
ggml.h		patch \| blob \| history
llama.cpp		patch \| blob \| history
llama.h		patch \| blob \| history