GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
- GGML_API void ggml_backend_reg_set_log_callback(ggml_backend_reg_t reg, ggml_log_callback log_callback, void * user_data);
+
// Functions that may be obtained using ggml_backend_reg_get_proc_address
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
- // Set the log callback for all registered backends
- GGML_API void ggml_backend_set_log_callback(ggml_log_callback log_callback, void * user_data);
-
// Direct backend (stream) initialization
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
size_t* free,
size_t* total);
-/**
- * @brief Set the logging callback for GGML.
- *
- * This function sets the logging callback and user data for logging.
- *
- * @param log_callback The logging callback to set.
- * @param user_data User data to pass to the logging callback.
- */
-GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
- void* user_data);
-
#ifdef __cplusplus
}
#endif
GGML_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
GGML_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
-GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
-
GGML_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
#ifdef __cplusplus
// user-code should use only these functions
//
-GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
-
GGML_API ggml_backend_t ggml_backend_metal_init(void);
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
+ // Set callback for all future logging events.
+ // If this is not called, or NULL is supplied, everything is output on stderr.
+ GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
+
// optimization parameters
//
// see ggml.c (ggml_opt_default_params) for default values
// (optional) get a pointer to a function in the backend
// backends can add custom functions that are not part of the standard ggml-backend interface
void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name);
-
- // (optional) set the log callback for the backend
- void (*set_log_callback)(ggml_backend_reg_t reg, ggml_log_callback log_callback, void * user_data);
};
struct ggml_backend_reg {
return reg->iface.get_proc_address(reg, name);
}
-void ggml_backend_reg_set_log_callback(ggml_backend_reg_t reg, ggml_log_callback log_callback, void * user_data) {
- if (reg->iface.set_log_callback) {
- reg->iface.set_log_callback(reg, log_callback, user_data);
- }
-}
-
// Backend registry
#ifdef GGML_USE_CUDA
return NULL;
}
-void ggml_backend_set_log_callback(ggml_log_callback log_callback, void * user_data) {
- for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
- ggml_backend_reg_t reg = ggml_backend_reg_get(i);
- ggml_backend_reg_set_log_callback(reg, log_callback, user_data);
- }
-}
-
// Convenience functions
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
/* .get_device = */ ggml_backend_cpu_reg_get_device,
/* .get_proc_address = */ NULL,
- /* .set_log_callback = */ NULL,
};
ggml_backend_reg_t ggml_backend_cpu_reg(void) {
#include "ggml-common.h"
-/**
- * @brief Default logging callback for GGML.
- *
- * This function is the default logging callback that logs messages to stderr.
- *
- * @param level The log level.
- * @param msg The log message.
- * @param user_data User data passed to the callback.
- */
-static void ggml_cann_default_log_callback(enum ggml_log_level level,
- const char* msg, void* user_data) {
- GGML_UNUSED(level);
- GGML_UNUSED(user_data);
- fprintf(stderr, "%s", msg);
-}
-
-ggml_log_callback ggml_cann_log_callback = ggml_cann_default_log_callback;
-void* ggml_cann_log_user_data = NULL;
-
-GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
- void* user_data) {
- ggml_cann_log_callback = log_callback;
- ggml_cann_log_user_data = user_data;
-}
-
-#define GGML_CANN_LOG_INFO(...) ggml_cann_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
-#define GGML_CANN_LOG_WARN(...) ggml_cann_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
-#define GGML_CANN_LOG_ERROR(...) \
- ggml_cann_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
-
-GGML_ATTRIBUTE_FORMAT(2, 3)
-
-/**
- * @brief Log a message using the current logging callback.
- *
- * This function formats a log message and passes it to the current logging
- * callback.
- *
- * @param level The log level.
- * @param format The format string for the log message.
- * @param ... The arguments for the format string.
- */
-static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) {
- if (ggml_cann_log_callback != NULL) {
- va_list args;
- va_start(args, format);
- char buffer[128];
- int len = vsnprintf(buffer, 128, format, args);
- if (len < 128) {
- ggml_cann_log_callback(level, buffer, ggml_cann_log_user_data);
- } else {
- // vsnprintf adds a null terminator
- std::vector<char> buffer2(len + 1);
- va_end(args);
- va_start(args, format);
- vsnprintf(&buffer2[0], buffer2.size(), format, args);
- ggml_cann_log_callback(level, buffer2.data(),
- ggml_cann_log_user_data);
- }
- va_end(args);
- }
-}
-
/**
* @brief Handles CANN errors by printing an error message and aborting.
*
int32_t id = -1;
aclrtGetDevice(&id);
- GGML_CANN_LOG_ERROR("CANN error: %s\n", msg);
- GGML_CANN_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func,
+ GGML_LOG_ERROR("CANN error: %s\n", msg);
+ GGML_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func,
file, line);
- GGML_CANN_LOG_ERROR(" %s\n", stmt);
+ GGML_LOG_ERROR(" %s\n", stmt);
// abort with GGML_ASSERT to get a stack trace
GGML_ABORT("CANN error");
}
aclError err = aclrtGetDeviceCount((uint32_t*)&info.device_count);
if (err != ACL_SUCCESS) {
- GGML_CANN_LOG_ERROR("%s: failed to initialize CANN: %s\n",
+ GGML_LOG_ERROR("%s: failed to initialize CANN: %s\n",
__func__, aclGetRecentErrMsg());
return info;
}
*actual_size = look_ahead_size;
pool_size += look_ahead_size;
#ifdef DEBUG_CANN_MALLOC
- GGML_CANN_LOG_INFO(
+ GGML_LOG_INFO(
"%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, "
"requested %u MB\n",
__func__, device, nnz, (uint32_t)(max_size / 1024 / 1024),
// add to the pool
pool_size += reserve_size;
- // GGML_CANN_LOG_INFO("cann pool[%d]: size increased to %llu MB (
+ // GGML_LOG_INFO("cann pool[%d]: size increased to %llu MB (
// reserved %llu MB)\n",
// device, (unsigned long long) (pool_size/1024/1024),
// (unsigned long long) (reserve_size/1024/1024));
pool_used += size;
#ifdef DEBUG_CANN_MALLOC
- GGML_CANN_LOG_INFO("cann pool[%d]: allocated %llu bytes at %llx\n", device,
+ GGML_LOG_INFO("cann pool[%d]: allocated %llu bytes at %llx\n", device,
(unsigned long long)size, (unsigned long long)ptr);
#endif
return ptr;
*/
void free(void* ptr, size_t size) override {
#ifdef DEBUG_CANN_MALLOC
- GGML_CANN_LOG_INFO("cann pool[%d]: freed %llu bytes at %llx\n", device,
+ GGML_LOG_INFO("cann pool[%d]: freed %llu bytes at %llx\n", device,
(unsigned long long)size, (unsigned long long)ptr);
#endif
void* dev_ptr;
aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST);
if (err != ACL_SUCCESS) {
- GGML_CANN_LOG_ERROR(
+ GGML_LOG_ERROR(
"%s: allocating %.2f MiB on device %d: aclrtMalloc failed: %s\n",
__func__, size / 1024.0 / 1024.0, buft_ctx->device,
aclGetRecentErrMsg());
aclError err = aclrtMallocHost((void **) &hostPtr, size);
if (err != ACL_SUCCESS) {
- GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
+ GGML_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
return nullptr;
}
bool ok = ggml_cann_compute_forward(*cann_ctx, node);
if (!ok) {
- GGML_CANN_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__,
+ GGML_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__,
node->name, ggml_op_name(node->op));
}
GGML_ASSERT(ok);
ggml_backend_t ggml_backend_cann_init(int32_t device) {
aclInit(nullptr);
if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
- GGML_CANN_LOG_ERROR("%s: error: invalid device %d\n", __func__, device);
+ GGML_LOG_ERROR("%s: error: invalid device %d\n", __func__, device);
return nullptr;
}
ggml_backend_cann_context* ctx = new ggml_backend_cann_context(device);
if (ctx == nullptr) {
- GGML_CANN_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
+ GGML_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
return nullptr;
}
ggml_cann_set_device(ctx->device);
static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
-static void ggml_cuda_default_log_callback(enum ggml_log_level level, const char * msg, void * user_data) {
- GGML_UNUSED(level);
- GGML_UNUSED(user_data);
- fprintf(stderr, "%s", msg);
-}
-
-ggml_log_callback ggml_cuda_log_callback = ggml_cuda_default_log_callback;
-void * ggml_cuda_log_user_data = NULL;
-
-GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data) {
- ggml_cuda_log_callback = log_callback;
- ggml_cuda_log_user_data = user_data;
-}
-
-#define GGML_CUDA_LOG_INFO(...) ggml_cuda_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
-#define GGML_CUDA_LOG_WARN(...) ggml_cuda_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
-#define GGML_CUDA_LOG_ERROR(...) ggml_cuda_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
-
-GGML_ATTRIBUTE_FORMAT(2, 3)
-static void ggml_cuda_log(enum ggml_log_level level, const char * format, ...) {
- if (ggml_cuda_log_callback != NULL) {
- va_list args;
- va_start(args, format);
- char buffer[128];
- int len = vsnprintf(buffer, 128, format, args);
- if (len < 128) {
- ggml_cuda_log_callback(level, buffer, ggml_cuda_log_user_data);
- } else {
- std::vector<char> buffer2(len + 1); // vsnprintf adds a null terminator
- va_end(args);
- va_start(args, format);
- vsnprintf(&buffer2[0], buffer2.size(), format, args);
- ggml_cuda_log_callback(level, buffer2.data(), ggml_cuda_log_user_data);
- }
- va_end(args);
- }
-}
-
[[noreturn]]
void ggml_cuda_error(const char * stmt, const char * func, const char * file, int line, const char * msg) {
int id = -1; // in case cudaGetDevice fails
cudaGetDevice(&id);
- GGML_CUDA_LOG_ERROR(GGML_CUDA_NAME " error: %s\n", msg);
- GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
- GGML_CUDA_LOG_ERROR(" %s\n", stmt);
+ GGML_LOG_ERROR(GGML_CUDA_NAME " error: %s\n", msg);
+ GGML_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
+ GGML_LOG_ERROR(" %s\n", stmt);
// abort with GGML_ABORT to get a stack trace
GGML_ABORT(GGML_CUDA_NAME " error");
}
cudaError_t err = cudaGetDeviceCount(&info.device_count);
if (err != cudaSuccess) {
- GGML_CUDA_LOG_ERROR("%s: failed to initialize " GGML_CUDA_NAME ": %s\n", __func__, cudaGetErrorString(err));
+ GGML_LOG_ERROR("%s: failed to initialize " GGML_CUDA_NAME ": %s\n", __func__, cudaGetErrorString(err));
return info;
}
int64_t total_vram = 0;
#ifdef GGML_CUDA_FORCE_MMQ
- GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: yes\n", __func__);
+ GGML_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: yes\n", __func__);
#else
- GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: no\n", __func__);
+ GGML_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: no\n", __func__);
#endif // GGML_CUDA_FORCE_MMQ
#ifdef GGML_CUDA_FORCE_CUBLAS
- GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: yes\n", __func__);
+ GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: yes\n", __func__);
#else
- GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
+ GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
#endif // GGML_CUDA_FORCE_CUBLAS
- GGML_CUDA_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
+ GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
for (int id = 0; id < info.device_count; ++id) {
int device_vmm = 0;
cudaDeviceProp prop;
CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
- GGML_CUDA_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
info.default_tensor_split[id] = total_vram;
total_vram += prop.totalGlobalMem;
*actual_size = look_ahead_size;
pool_size += look_ahead_size;
#ifdef DEBUG_CUDA_MALLOC
- GGML_CUDA_LOG_INFO("%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, device, nnz,
+ GGML_LOG_INFO("%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, device, nnz,
(uint32_t)(max_size / 1024 / 1024), (uint32_t)(pool_size / 1024 / 1024), (uint32_t)(size / 1024 / 1024));
#endif
return ptr;
return;
}
}
- GGML_CUDA_LOG_WARN(GGML_CUDA_NAME " buffer pool full, increase MAX_CUDA_BUFFERS\n");
+ GGML_LOG_WARN(GGML_CUDA_NAME " buffer pool full, increase MAX_CUDA_BUFFERS\n");
ggml_cuda_set_device(device);
CUDA_CHECK(cudaFree(ptr));
pool_size -= size;
if (err != cudaSuccess) {
// clear the error
cudaGetLastError();
- GGML_CUDA_LOG_ERROR("%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, cudaGetErrorString(err));
+ GGML_LOG_ERROR("%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size / 1024.0 / 1024.0, buft_ctx->device, cudaGetErrorString(err));
return nullptr;
}
if (err != cudaSuccess) {
// clear the error
cudaGetLastError();
- GGML_CUDA_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
+ GGML_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
size / 1024.0 / 1024.0, cudaGetErrorString(err));
return nullptr;
}
break;
case GGML_OP_MUL_MAT:
if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
- GGML_CUDA_LOG_ERROR("%s: cannot compute %s: src0->ne[3] = %" PRId64 ", src1->ne[3] = %" PRId64 " - fallback to CPU\n", __func__, dst->name, dst->src[0]->ne[3], dst->src[1]->ne[3]);
+ GGML_LOG_ERROR("%s: cannot compute %s: src0->ne[3] = %" PRId64 ", src1->ne[3] = %" PRId64 " - fallback to CPU\n", __func__, dst->name, dst->src[0]->ne[3], dst->src[1]->ne[3]);
return false;
} else {
ggml_cuda_mul_mat(ctx, dst->src[0], dst->src[1], dst);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
- GGML_CUDA_LOG_ERROR("%s: %s failed\n", __func__, ggml_op_desc(dst));
+ GGML_LOG_ERROR("%s: %s failed\n", __func__, ggml_op_desc(dst));
CUDA_CHECK(err);
}
if (cuda_ctx_src->device != buf_ctx_src->device || cuda_ctx_dst->device != buf_ctx_dst->device) {
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: backend and buffer devices do not match\n", __func__);
+ GGML_LOG_WARN("%s: backend and buffer devices do not match\n", __func__);
#endif
return false;
}
if (ggml_cuda_info().devices[cuda_ctx->device].cc < CC_AMPERE) {
cuda_ctx->cuda_graph->disable_due_to_gpu_arch = true;
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to GPU architecture\n", __func__);
+ GGML_LOG_WARN("%s: disabling CUDA graphs due to GPU architecture\n", __func__);
#endif
}
}
if (node->src[0] && node->src[0]->buffer && ggml_backend_buffer_is_cuda_split(node->src[0]->buffer)) {
use_cuda_graph = false; // Split buffers are not supported by CUDA graph capture
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to split buffer\n", __func__);
+ GGML_LOG_WARN("%s: disabling CUDA graphs due to split buffer\n", __func__);
#endif
}
if (node->op == GGML_OP_MUL_MAT_ID) {
use_cuda_graph = false; // This node type is not supported by CUDA graph capture
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to mul_mat_id\n", __func__);
+ GGML_LOG_WARN("%s: disabling CUDA graphs due to mul_mat_id\n", __func__);
#endif
}
// Changes in batch size or context size can cause changes to the grid size of some kernels.
use_cuda_graph = false;
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n", __func__, node->name, node->ne[0], node->ne[1], node->ne[2], node->ne[3]);
+ GGML_LOG_WARN("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n", __func__, node->name, node->ne[0], node->ne[1], node->ne[2], node->ne[3]);
#endif
}
if (!ptr) {
use_cuda_graph = false;
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to unsupported copy op\n", __func__);
+ GGML_LOG_WARN("%s: disabling CUDA graphs due to unsupported copy op\n", __func__);
#endif
} else {
if (std::find(ggml_cuda_cpy_fn_ptrs.begin(), ggml_cuda_cpy_fn_ptrs.end(), ptr) == ggml_cuda_cpy_fn_ptrs.end()) {
if (cuda_ctx->cuda_graph->number_consecutive_updates >= 4) {
cuda_ctx->cuda_graph->disable_due_to_too_many_updates = true;
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to too many consecutive updates\n", __func__);
+ GGML_LOG_WARN("%s: disabling CUDA graphs due to too many consecutive updates\n", __func__);
#endif
}
}
bool ok = ggml_cuda_compute_forward(*cuda_ctx, node);
if (!ok) {
- GGML_CUDA_LOG_ERROR("%s: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
+ GGML_LOG_ERROR("%s: op not supported %s (%s)\n", __func__, node->name, ggml_op_name(node->op));
}
GGML_ASSERT(ok);
}
use_cuda_graph = false;
cuda_ctx->cuda_graph->disable_due_to_failed_graph_capture = true;
#ifndef NDEBUG
- GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to failed graph capture\n", __func__);
+ GGML_LOG_WARN("%s: disabling CUDA graphs due to failed graph capture\n", __func__);
#endif
} else {
graph_evaluated_or_captured = true; // CUDA graph has been captured
cudaError_t stat = cudaGraphExecUpdate(cuda_ctx->cuda_graph->instance, cuda_ctx->cuda_graph->graph, &result_info);
if (stat == cudaErrorGraphExecUpdateFailure) {
#ifndef NDEBUG
- GGML_CUDA_LOG_ERROR("%s: CUDA graph update failed\n", __func__);
+ GGML_LOG_ERROR("%s: CUDA graph update failed\n", __func__);
#endif
// The pre-existing graph exec cannot be updated due to violated constraints
// so instead clear error and re-instantiate
// clear the error
cudaGetLastError();
- GGML_CUDA_LOG_WARN("%s: failed to register %.2f MiB of pinned memory: %s\n", __func__,
+ GGML_LOG_WARN("%s: failed to register %.2f MiB of pinned memory: %s\n", __func__,
size / 1024.0 / 1024.0, cudaGetErrorString(err));
return false;
}
return nullptr;
}
-static void ggml_backend_cuda_reg_set_log_callback(ggml_backend_reg_t reg, ggml_log_callback log_callback, void * user_data) {
- GGML_UNUSED(reg);
- ggml_backend_cuda_log_set_callback(log_callback, user_data);
-}
-
static const ggml_backend_reg_i ggml_backend_cuda_reg_interface = {
/* .get_name = */ ggml_backend_cuda_reg_get_name,
/* .get_device_count = */ ggml_backend_cuda_reg_get_device_count,
/* .get_device_get = */ ggml_backend_cuda_reg_get_device,
/* .get_proc_address = */ ggml_backend_cuda_reg_get_proc_address,
- /* .set_log_callback = */ ggml_backend_cuda_reg_set_log_callback,
};
// backend registry
ggml_backend_t ggml_backend_cuda_init(int device) {
if (device < 0 || device >= ggml_backend_cuda_get_device_count()) {
- GGML_CUDA_LOG_ERROR("%s: invalid device %d\n", __func__, device);
+ GGML_LOG_ERROR("%s: invalid device %d\n", __func__, device);
return nullptr;
}
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context(device);
if (ctx == nullptr) {
- GGML_CUDA_LOG_ERROR("%s: failed to allocate context\n", __func__);
+ GGML_LOG_ERROR("%s: failed to allocate context\n", __func__);
return nullptr;
}
#endif
#endif
+//
+// logging
+//
+
+GGML_ATTRIBUTE_FORMAT(2, 3)
+void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
+void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
+
+#define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
+#define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
+#define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
+#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
+#define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
+#define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
+
// bitset
typedef uint32_t ggml_bitset_t;
// max number of MTLCommandBuffer used to submit a graph for processing
#define GGML_METAL_MAX_COMMAND_BUFFERS 8
-#ifdef GGML_METAL_NDEBUG
-#define GGML_METAL_LOG(...)
-#define GGML_METAL_LOG_INFO(...)
-#define GGML_METAL_LOG_WARN(...)
-#define GGML_METAL_LOG_ERROR(...)
-#else
-#define GGML_METAL_LOG(...) ggml_metal_log(GGML_LOG_LEVEL_NONE, __VA_ARGS__)
-#define GGML_METAL_LOG_INFO(...) ggml_metal_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
-#define GGML_METAL_LOG_WARN(...) ggml_metal_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
-#define GGML_METAL_LOG_ERROR(...) ggml_metal_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
-#define GGML_METAL_LOG_DEBUG(...) ggml_metal_log(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
-#endif
-
#define UNUSED(x) (void)(x)
struct ggml_metal_kernel {
@implementation GGMLMetalClass
@end
-static void ggml_metal_default_log_callback(enum ggml_log_level level, const char * msg, void * user_data) {
- fprintf(stderr, "%s", msg);
-
- UNUSED(level);
- UNUSED(user_data);
-}
-
-ggml_log_callback ggml_metal_log_callback = ggml_metal_default_log_callback;
-void * ggml_metal_log_user_data = NULL;
-
-GGML_ATTRIBUTE_FORMAT(2, 3)
-static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
- if (ggml_metal_log_callback != NULL) {
- va_list args;
- va_start(args, format);
- char buffer[128];
- int len = vsnprintf(buffer, 128, format, args);
- if (len < 128) {
- ggml_metal_log_callback(level, buffer, ggml_metal_log_user_data);
- } else {
- char* buffer2 = malloc(len+1);
- va_end(args);
- va_start(args, format);
- vsnprintf(buffer2, len+1, format, args);
- buffer2[len] = 0;
- ggml_metal_log_callback(level, buffer2, ggml_metal_log_user_data);
- free(buffer2);
- }
- va_end(args);
- }
-}
-
static void * ggml_metal_host_malloc(size_t n) {
void * data = NULL;
#if TARGET_OS_OSX
kern_return_t err = vm_allocate((vm_map_t) mach_task_self(), (void *) &data, n, VM_FLAGS_ANYWHERE);
if (err != KERN_SUCCESS) {
- GGML_METAL_LOG_ERROR("%s: error: vm_allocate failed\n", __func__);
+ GGML_LOG_ERROR("%s: error: vm_allocate failed\n", __func__);
return NULL;
}
#else
const int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n);
if (result != 0) {
- GGML_METAL_LOG_ERROR("%s: error: posix_memalign failed\n", __func__);
+ GGML_LOG_ERROR("%s: error: posix_memalign failed\n", __func__);
return NULL;
}
#endif
}
static struct ggml_backend_metal_context * ggml_metal_init(void) {
- GGML_METAL_LOG_INFO("%s: allocating\n", __func__);
+ GGML_LOG_INFO("%s: allocating\n", __func__);
#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
// Show all the Metal device instances in the system
NSArray * devices = MTLCopyAllDevices();
for (id<MTLDevice> device in devices) {
- GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
+ GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
[devices release]; // since it was created by a *Copy* C method
#endif
// Pick and show default Metal device
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
- GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
+ GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
// Configure context
struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
if (try_metallib && path_lib != nil) {
// pre-compiled library found
NSURL * libURL = [NSURL fileURLWithPath:path_lib];
- GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
+ GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
if (error) {
- GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
+ GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL;
}
} else {
#if GGML_METAL_EMBED_LIBRARY
- GGML_METAL_LOG_INFO("%s: using embedded metal library\n", __func__);
+ GGML_LOG_INFO("%s: using embedded metal library\n", __func__);
extern const char ggml_metallib_start[];
extern const char ggml_metallib_end[];
NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
#else
- GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
+ GGML_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
NSString * path_source;
NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
- GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
+ GGML_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
if (path_resource) {
path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
}
if (path_source == nil) {
- GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
+ GGML_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
path_source = @"ggml-metal.metal";
}
- GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
+ GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
if (error) {
- GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
+ GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL;
}
#endif // GGML_METAL_EMBED_LIBRARY
metal_library = [ctx->device newLibraryWithSource:src options:options error:&error];
if (error) {
- GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
+ GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL;
}
}
}
// print MTL GPU family:
- GGML_METAL_LOG_INFO("%s: GPU name: %s\n", __func__, [[ctx->device name] UTF8String]);
+ GGML_LOG_INFO("%s: GPU name: %s\n", __func__, [[ctx->device name] UTF8String]);
const NSInteger MTLGPUFamilyMetal3 = 5001;
{
for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) {
if ([ctx->device supportsFamily:i]) {
- GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - (int) MTLGPUFamilyApple1 + 1, i);
+ GGML_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - (int) MTLGPUFamilyApple1 + 1, i);
break;
}
}
for (int i = MTLGPUFamilyCommon1 + 5; i >= MTLGPUFamilyCommon1; --i) {
if ([ctx->device supportsFamily:i]) {
- GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyCommon%d (%d)\n", __func__, i - (int) MTLGPUFamilyCommon1 + 1, i);
+ GGML_LOG_INFO("%s: GPU family: MTLGPUFamilyCommon%d (%d)\n", __func__, i - (int) MTLGPUFamilyCommon1 + 1, i);
break;
}
}
for (int i = MTLGPUFamilyMetal3 + 5; i >= MTLGPUFamilyMetal3; --i) {
if ([ctx->device supportsFamily:i]) {
- GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyMetal%d (%d)\n", __func__, i - (int) MTLGPUFamilyMetal3 + 3, i);
+ GGML_LOG_INFO("%s: GPU family: MTLGPUFamilyMetal%d (%d)\n", __func__, i - (int) MTLGPUFamilyMetal3 + 3, i);
break;
}
}
ctx->support_simdgroup_mm = [ctx->device supportsFamily:MTLGPUFamilyApple7];
- GGML_METAL_LOG_INFO("%s: simdgroup reduction support = %s\n", __func__, ctx->support_simdgroup_reduction ? "true" : "false");
- GGML_METAL_LOG_INFO("%s: simdgroup matrix mul. support = %s\n", __func__, ctx->support_simdgroup_mm ? "true" : "false");
- GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+ GGML_LOG_INFO("%s: simdgroup reduction support = %s\n", __func__, ctx->support_simdgroup_reduction ? "true" : "false");
+ GGML_LOG_INFO("%s: simdgroup matrix mul. support = %s\n", __func__, ctx->support_simdgroup_mm ? "true" : "false");
+ GGML_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
ctx->capture_next_compute = false;
ctx->capture_started = false;
#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
if (@available(macOS 10.12, iOS 16.0, *)) {
- GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
+ GGML_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
}
#elif TARGET_OS_OSX
if (ctx->device.maxTransferRate != 0) {
- GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
+ GGML_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
} else {
- GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
+ GGML_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
}
#endif
}
/*
- GGML_METAL_LOG_INFO("%s: loaded %-40s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) kernel->pipeline, \
+ GGML_LOG_INFO("%s: loaded %-40s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) kernel->pipeline, \
(int) kernel->pipeline.maxTotalThreadsPerThreadgroup, \
(int) kernel->pipeline.threadExecutionWidth); \
*/
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
[metal_function release]; \
if (error) { \
- GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
+ GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
[metal_library release]; \
return NULL; \
} \
} else { \
- GGML_METAL_LOG_WARN("%s: skipping %-40s (not supported)\n", __func__, "kernel_"#name); \
+ GGML_LOG_WARN("%s: skipping %-40s (not supported)\n", __func__, "kernel_"#name); \
}
// simd_sum and simd_max requires MTLGPUFamilyApple7
}
static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
- GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
+ GGML_LOG_INFO("%s: deallocating\n", __func__);
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
[ctx->kernels[i].pipeline release];
// Metal buffer based on the host memory pointer
//
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
- //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
+ //GGML_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
const int64_t tsize = ggml_nbytes(t);
for (int i = 0; i < buf_ctx->n_buffers; ++i) {
const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
- //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
+ //GGML_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
*offs = (size_t) ioffs;
- //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
+ //GGML_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
return buf_ctx->buffers[i].metal;
}
}
- GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
+ GGML_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
return nil;
}
struct ggml_tensor * node = ggml_graph_node(gf, idx);
- //GGML_METAL_LOG_INFO("%s: encoding node %3d, op = %8s\n", __func__, idx, ggml_op_name(node->op));
+ //GGML_LOG_INFO("%s: encoding node %3d, op = %8s\n", __func__, idx, ggml_op_name(node->op));
struct ggml_tensor * src0 = node->src[0];
struct ggml_tensor * src1 = node->src[1];
}
if (!ggml_metal_supports_op(ctx, dst)) {
- GGML_METAL_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
+ GGML_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
GGML_ABORT("unsupported op");
}
id<MTLBuffer> id_src2 = src2 ? ggml_metal_get_buffer(src2, &offs_src2) : nil;
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
- //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
+ //GGML_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
//if (src0) {
- // GGML_METAL_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
+ // GGML_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
// ggml_is_contiguous(src0), src0->name);
//}
//if (src1) {
- // GGML_METAL_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
+ // GGML_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
// ggml_is_contiguous(src1), src1->name);
//}
//if (dst) {
- // GGML_METAL_LOG_INFO("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
+ // GGML_LOG_INFO("%s: dst - %4s [%5lld, %5lld, %5lld], 1, %s\n", __func__, ggml_type_name(dstt), ne0, ne1, ne2,
// dst->name);
//}
} break;
default:
{
- GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
+ GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
GGML_ABORT("fatal error");
}
} break;
} break;
default:
{
- GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
+ GGML_LOG_ERROR("Asserting on type %d\n", (int)src0t);
GGML_ABORT("not implemented");
}
};
} break;
default:
{
- GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src2t);
+ GGML_LOG_ERROR("Asserting on type %d\n", (int)src2t);
GGML_ABORT("not implemented");
}
};
//case 256: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_F16_H256].pipeline; break;
default:
{
- GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
- GGML_METAL_LOG_ERROR("add template specialization for this size\n");
+ GGML_LOG_ERROR("unsupported size: %lld\n", ne00);
+ GGML_LOG_ERROR("add template specialization for this size\n");
GGML_ABORT("add template specialization for this size");
}
}
//case 256: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H256].pipeline; break;
default:
{
- GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
- GGML_METAL_LOG_ERROR("add template specialization for this size\n");
+ GGML_LOG_ERROR("unsupported size: %lld\n", ne00);
+ GGML_LOG_ERROR("add template specialization for this size\n");
GGML_ABORT("add template specialization for this size");
}
}
} break;
default:
{
- GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
+ GGML_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
GGML_ABORT("fatal error");
}
}
NSError * error = nil;
if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) {
- GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
+ GGML_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
GGML_ABORT("capture failed");
} else {
[ctx->capture_scope beginScope];
MTLCommandBufferStatus status = [command_buffer status];
if (status != MTLCommandBufferStatusCompleted) {
- GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, n_cb, status);
+ GGML_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, n_cb, status);
if (status == MTLCommandBufferStatusError) {
- GGML_METAL_LOG_INFO("error: %s\n", [[command_buffer error].localizedDescription UTF8String]);
+ GGML_LOG_INFO("error: %s\n", [[command_buffer error].localizedDescription UTF8String]);
}
return GGML_STATUS_FAILED;
MTLCommandBufferStatus status = [command_buffer status];
if (status != MTLCommandBufferStatusCompleted) {
- GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
+ GGML_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
if (status == MTLCommandBufferStatusError) {
- GGML_METAL_LOG_INFO("error: %s\n", [[command_buffer error].localizedDescription UTF8String]);
+ GGML_LOG_INFO("error: %s\n", [[command_buffer error].localizedDescription UTF8String]);
}
return GGML_STATUS_FAILED;
}
if (ctx->abort_callback && ctx->abort_callback(ctx->abort_callback_data)) {
- GGML_METAL_LOG_INFO("%s: command buffer %d aborted", __func__, i);
+ GGML_LOG_INFO("%s: command buffer %d aborted", __func__, i);
return GGML_STATUS_ABORTED;
}
#ifndef GGML_METAL_NDEBUG
#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
if (@available(macOS 10.12, iOS 16.0, *)) {
- GGML_METAL_LOG_DEBUG("%s: allocated buffer, size = %8.2f MiB, (%8.2f / %8.2f)\n",
+ GGML_LOG_DEBUG("%s: allocated buffer, size = %8.2f MiB, (%8.2f / %8.2f)\n",
__func__,
size_aligned / 1024.0 / 1024.0,
device.currentAllocatedSize / 1024.0 / 1024.0,
device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize) {
- GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
+ GGML_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
}
} else {
- GGML_METAL_LOG_INFO("%s: allocated buffer, size = %8.2f MiB, (%8.2f)\n",
+ GGML_LOG_INFO("%s: allocated buffer, size = %8.2f MiB, (%8.2f)\n",
__func__,
size_aligned / 1024.0 / 1024.0,
device.currentAllocatedSize / 1024.0 / 1024.0);
}
if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
- GGML_METAL_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
+ GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
free(ctx);
ggml_backend_metal_free_device();
return NULL;
ctx->buffers[ctx->n_buffers].metal = [device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
if (ctx->buffers[ctx->n_buffers].metal == nil) {
- GGML_METAL_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
+ GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
return false;
}
}
ctx->buffers[ctx->n_buffers].metal = [device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
if (ctx->buffers[ctx->n_buffers].metal == nil) {
- GGML_METAL_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_step_aligned / 1024.0 / 1024.0);
+ GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_step_aligned / 1024.0 / 1024.0);
return false;
}
}
ggml_backend_metal_log_allocated_size(device, size_step_aligned);
if (i + size_step < size) {
- GGML_METAL_LOG_INFO("\n");
+ GGML_LOG_INFO("\n");
}
++ctx->n_buffers;
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_COMMAND_BUFFERS);
if (ctx->n_cb > 2) {
- GGML_METAL_LOG_WARN("%s: n_cb = %d, using n_cb > 2 is not recommended and can degrade the performance in some cases\n", __func__, n_cb);
+ GGML_LOG_WARN("%s: n_cb = %d, using n_cb > 2 is not recommended and can degrade the performance in some cases\n", __func__, n_cb);
}
}
/* .event_wait = */ NULL,
};
-void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {
- ggml_metal_log_callback = log_callback;
- ggml_metal_log_user_data = user_data;
-}
-
static ggml_guid_t ggml_backend_metal_guid(void) {
static ggml_guid guid = { 0x81, 0xa1, 0x8b, 0x1e, 0x71, 0xec, 0x79, 0xed, 0x2b, 0x85, 0xdc, 0x8a, 0x61, 0x98, 0x30, 0xe6 };
return &guid;
ggml_backend_t ggml_backend_metal_init(void) {
struct ggml_backend_metal_context * ctx = ggml_metal_init();
if (ctx == NULL) {
- GGML_METAL_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
+ GGML_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
return NULL;
}
// logging
//
+struct ggml_logger_state {
+ ggml_log_callback log_callback;
+ void * log_callback_user_data;
+};
+static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
+
+static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
+ if (format == NULL)
+ return;
+ va_list args_copy;
+ va_copy(args_copy, args);
+ char buffer[128];
+ int len = vsnprintf(buffer, 128, format, args);
+ if (len < 128) {
+ g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
+ } else {
+ char * buffer2 = (char *) calloc(len + 1, sizeof(char));
+ vsnprintf(buffer2, len + 1, format, args_copy);
+ buffer2[len] = 0;
+ g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
+ free(buffer2);
+ }
+ va_end(args_copy);
+}
+
+void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
+ va_list args;
+ va_start(args, format);
+ ggml_log_internal_v(level, format, args);
+ va_end(args);
+}
+
+void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
+ (void) level;
+ (void) user_data;
+ fputs(text, stderr);
+ fflush(stderr);
+}
+
#if (GGML_DEBUG >= 1)
-#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
+#define GGML_PRINT_DEBUG(...) GGML_LOG_DEBUG(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif
#if (GGML_DEBUG >= 5)
-#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
+#define GGML_PRINT_DEBUG_5(...) GGML_LOG_DEBUG(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_5(...)
#endif
#if (GGML_DEBUG >= 10)
-#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
+#define GGML_PRINT_DEBUG_10(...) GGML_LOG_DEBUG(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_10(...)
#endif
-#define GGML_PRINT(...) printf(__VA_ARGS__)
-
//
// end of logging block
//
#else
inline static void * ggml_aligned_malloc(size_t size) {
if (size == 0) {
- GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
+ GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
return NULL;
}
void * aligned_memory = NULL;
error_desc = "insufficient memory";
break;
}
- GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
+ GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
GGML_ABORT("fatal error");
return NULL;
}
inline static void * ggml_malloc(size_t size) {
if (size == 0) {
- GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
+ GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
return NULL;
}
void * result = malloc(size);
if (result == NULL) {
- GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
+ GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
GGML_ABORT("fatal error");
}
return result;
// calloc
inline static void * ggml_calloc(size_t num, size_t size) {
if (num == 0 || size == 0) {
- GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
+ GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
return NULL;
}
void * result = calloc(num, size);
if (result == NULL) {
- GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
+ GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
GGML_ABORT("fatal error");
}
return result;
if (fptr != NULL) {
char buf[42];
if (fgets(buf, sizeof(buf), fptr) && strncmp(buf, "0\n", sizeof(buf)) != 0) {
- GGML_PRINT("WARNING: /proc/sys/kernel/numa_balancing is enabled, this has been observed to impair performance\n");
+ GGML_LOG_WARN("/proc/sys/kernel/numa_balancing is enabled, this has been observed to impair performance\n");
}
fclose(fptr);
}
////////////////////////////////////////////////////////////////////////////////
void ggml_print_object(const struct ggml_object * obj) {
- GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
+ GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
obj->type, obj->offs, obj->size, (const void *) obj->next);
}
void ggml_print_objects(const struct ggml_context * ctx) {
struct ggml_object * obj = ctx->objects_begin;
- GGML_PRINT("%s: objects in context %p:\n", __func__, (const void *) ctx);
+ GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
while (obj != NULL) {
ggml_print_object(obj);
obj = obj->next;
}
- GGML_PRINT("%s: --- end ---\n", __func__);
+ GGML_LOG_INFO("%s: --- end ---\n", __func__);
}
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
- GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
+ GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
assert(false);
return NULL;
if (ctx->scratch.data != NULL) {
// allocate tensor data in the scratch buffer
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
- GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
+ GGML_LOG_WARN("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
assert(false);
return NULL;
}
#else
if (n_threads > threadpool->n_threads_max) {
- GGML_PRINT("WARNING: cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->n_threads_max);
+ GGML_LOG_WARN("cplan requested more threads (%d) than available (%d)\n", n_threads, threadpool->n_threads_max);
n_threads = threadpool->n_threads_max;
}
}
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
- GGML_PRINT("=== GRAPH ===\n");
+ GGML_LOG_INFO("=== GRAPH ===\n");
- GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
+ GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i];
- GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
+ GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
i,
node->ne[0], node->ne[1], node->ne[2],
ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ");
}
- GGML_PRINT("n_leafs = %d\n", cgraph->n_leafs);
+ GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
for (int i = 0; i < cgraph->n_leafs; i++) {
struct ggml_tensor * node = cgraph->leafs[i];
- GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
+ GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
i,
node->ne[0], node->ne[1],
ggml_op_name(node->op),
ggml_get_name(node));
}
- GGML_PRINT("========================================\n");
+ GGML_LOG_INFO("========================================\n");
}
// check if node is part of the graph
fclose(fp);
- GGML_PRINT("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
+ GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
}
////////////////////////////////////////////////////////////////////////////////
return 0;
#endif
}
+
+void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
+ g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
+ g_logger_state.log_callback_user_data = user_data;
+}
////////////////////////////////////////////////////////////////////////////////
// globals
//
-struct llama_state {
- llama_state() {
- llama_log_set(log_callback, log_callback_user_data);
- }
-
- // We save the log callback globally
+struct llama_logger_state {
ggml_log_callback log_callback = llama_log_callback_default;
void * log_callback_user_data = nullptr;
};
-static llama_state g_state;
+static llama_logger_state g_logger_state;
// available llama models
enum e_model {
}
void llama_log_set(ggml_log_callback log_callback, void * user_data) {
- g_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
- g_state.log_callback_user_data = user_data;
-
- ggml_backend_set_log_callback(log_callback, user_data);
-
-#ifdef GGML_USE_METAL
- ggml_backend_metal_log_set_callback(g_state.log_callback, g_state.log_callback_user_data);
-#elif defined(GGML_USE_CANN)
- ggml_backend_cann_log_set_callback(g_state.log_callback, g_state.log_callback_user_data);
-#endif
+ ggml_log_set(log_callback, user_data);
+ g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
+ g_logger_state.log_callback_user_data = user_data;
}
static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) {
char buffer[128];
int len = vsnprintf(buffer, 128, format, args);
if (len < 128) {
- g_state.log_callback(level, buffer, g_state.log_callback_user_data);
+ g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
} else {
char * buffer2 = new char[len + 1];
vsnprintf(buffer2, len + 1, format, args_copy);
buffer2[len] = 0;
- g_state.log_callback(level, buffer2, g_state.log_callback_user_data);
+ g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
delete[] buffer2;
}
va_end(args_copy);