#define GGML_METAL_MAX_KERNELS 256
-struct ggml_metal_buffer {
- const char * name;
-
- void * data;
- size_t size;
-
- id<MTLBuffer> metal;
-};
-
struct ggml_metal_kernel {
id<MTLFunction> function;
id<MTLComputePipelineState> pipeline;
dispatch_queue_t d_queue;
- int n_buffers;
- struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
-
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
bool support_simdgroup_reduction;
// Show all the Metal device instances in the system
NSArray * devices = MTLCopyAllDevices();
for (id<MTLDevice> device in devices) {
- NSString * s = [device name];
- GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
+ GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
[devices release]; // since it was created by a *Copy* C method
#endif
// Pick and show default Metal device
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
- NSString * s = [device name];
- GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
+ GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
// Configure context
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
ctx->device = device;
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
ctx->queue = [ctx->device newCommandQueue];
- ctx->n_buffers = 0;
-
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
// load library
static void ggml_metal_free(struct ggml_metal_context * ctx) {
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
- for (int i = 0; i < ctx->n_buffers; ++i) {
- [ctx->buffers[i].metal release];
- }
-
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
if (ctx->kernels[i].pipeline) {
[ctx->kernels[i].pipeline release];
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
// Metal buffer based on the host memory pointer
//
-static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
+static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
const int64_t tsize = ggml_nbytes(t);
ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
- // compatibility with ggml-backend
- if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
- struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
-
- // find the view that contains the tensor fully
- for (int i = 0; i < buf_ctx->n_buffers; ++i) {
- const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
-
- //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
- if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
- *offs = (size_t) ioffs;
-
- //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
-
- return buf_ctx->buffers[i].metal;
- }
- }
-
- GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
-
- return nil;
- }
+ struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
// find the view that contains the tensor fully
- for (int i = 0; i < ctx->n_buffers; ++i) {
- const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
+ for (int i = 0; i < buf_ctx->n_buffers; ++i) {
+ const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
- //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name);
- if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
+ //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
+ if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
*offs = (size_t) ioffs;
- //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
+ //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
- return ctx->buffers[i].metal;
+ return buf_ctx->buffers[i].metal;
}
}
- GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
+ GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
return nil;
}
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
- id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil;
- id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
- id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil;
+ id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
+ id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
+ id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
//if (src0) {
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
size_t offs_src_cur = 0;
- id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
+ id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
}
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
size_t offs_src_cur = 0;
- id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
+ id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
}