int mtl_device_ref_count;
id<MTLLibrary> mtl_library;
+ NSLock * mtl_lock;
+
bool has_simdgroup_reduction;
bool has_simdgroup_mm;
bool has_residency_sets;
bool has_bfloat;
bool use_bfloat;
+ size_t max_size;
+
char name[128];
} g_ggml_ctx_dev_main = {
/*.mtl_device =*/ nil,
/*.mtl_device_ref_count =*/ 0,
/*.mtl_library =*/ nil,
+ /*.mtl_lock =*/ nil,
/*.has_simdgroup_reduction =*/ false,
/*.has_simdgroup_mm =*/ false,
/*.has_residency_sets =*/ false,
/*.has_bfloat =*/ false,
/*.use_bfloat =*/ false,
+ /*.max_size =*/ 0,
/*.name =*/ "",
};
static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_device_context * ctx) {
assert(ctx != NULL);
+ if (ctx->mtl_lock == nil) {
+ ctx->mtl_lock = [[NSLock alloc] init];
+ }
+
if (ctx->mtl_device == nil) {
ctx->mtl_device = MTLCreateSystemDefaultDevice();
}
ctx->use_bfloat = false;
#endif
+ ctx->max_size = ctx->mtl_device.maxBufferLength;
+
strncpy(ctx->name, [[ctx->mtl_device name] UTF8String], sizeof(ctx->name) - 1);
}
ctx->mtl_device_ref_count--;
if (ctx->mtl_device_ref_count == 0) {
+ if (ctx->mtl_lock) {
+ [ctx->mtl_lock release];
+ ctx->mtl_lock = nil;
+ }
+
if (ctx->mtl_library) {
[ctx->mtl_library release];
ctx->mtl_library = nil;
struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
struct ggml_backend_metal_device_context * ctx_dev = dev->context;
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
+ id<MTLDevice> device = ctx_dev->mtl_device;
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
// load library
- if (ctx_dev->mtl_library == nil) {
- ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
+ {
+ [ctx_dev->mtl_lock lock];
+
+ if (ctx_dev->mtl_library == nil) {
+ ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
+ }
+
+ [ctx_dev->mtl_lock unlock];
}
+
id<MTLLibrary> metal_library = ctx_dev->mtl_library;
if (metal_library == nil) {
GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__);
}
ggml_backend_metal_buffer_rset_free(ctx);
- ggml_backend_metal_device_rel(buffer->buft->device->context);
if (ctx->owned) {
#if TARGET_OS_OSX
}
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context;
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
+
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
+
+ id<MTLDevice> device = ctx_dev->mtl_device;
ctx->all_data = ggml_metal_host_malloc(size_aligned);
ctx->all_size = size_aligned;
if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
free(ctx);
- ggml_backend_metal_device_rel(ctx_dev);
return NULL;
}
if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
free(ctx);
- ggml_backend_metal_device_rel(ctx_dev);
return NULL;
}
static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
return 32;
+
GGML_UNUSED(buft);
}
static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
- id<MTLDevice> device = ggml_backend_metal_device_acq(buft->device->context);
- const size_t max_size = device.maxBufferLength;
- ggml_backend_metal_device_rel(buft->device->context);
+ const size_t max_size = ((struct ggml_backend_metal_device_context *)buft->device->context)->max_size;
return max_size;
-
- GGML_UNUSED(buft);
}
static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
}
struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
+
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
+
+ id<MTLDevice> device = ctx_dev->mtl_device;
// the buffer fits into the max buffer size allowed by the device
if (size_aligned <= device.maxBufferLength) {
if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
free(ctx);
- ggml_backend_metal_device_rel(ctx_dev);
return NULL;
}
}
static void ggml_backend_metal_free(ggml_backend_t backend) {
- struct ggml_backend_metal_context * ctx = backend->context;
- struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
+ struct ggml_backend_metal_context * ctx = backend->context;
- ggml_backend_metal_device_rel(ctx_dev);
ggml_metal_free(ctx);
free(backend);
struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
+
return [ctx_dev->mtl_device supportsFamily:(MTLGPUFamilyApple1 + family - 1)];
}
}
static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t dev) {
- // acq/rel just to populate ctx->name in case it hasn't been done yet
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
- ggml_backend_metal_device_acq(ctx_dev);
- ggml_backend_metal_device_rel(ctx_dev);
return ctx_dev->name;
}
static void ggml_backend_metal_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
if (@available(macOS 10.12, iOS 16.0, *)) {
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
+ id<MTLDevice> device = ctx_dev->mtl_device;
*total = device.recommendedMaxWorkingSetSize;
*free = *total - device.currentAllocatedSize;
-
- ggml_backend_metal_device_rel(ctx_dev);
} else {
*free = 1;
*total = 1;
}
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context;
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
+
+ GGML_ASSERT(ctx_dev->mtl_device != nil);
+
+ id<MTLDevice> device = ctx_dev->mtl_device;
// the buffer fits into the max buffer size allowed by the device
if (size_aligned <= device.maxBufferLength) {
if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
free(ctx);
- ggml_backend_metal_device_rel(ctx_dev);
return NULL;
}
}
static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
- return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
- buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
+ return
+ buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
+ buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
GGML_UNUSED(dev);
}
/* .get_proc_address = */ ggml_backend_metal_get_proc_address,
};
+// called upon program exit
+static void ggml_metal_cleanup(void) {
+ ggml_backend_metal_device_rel(&g_ggml_ctx_dev_main);
+}
+
+// TODO: make thread-safe
ggml_backend_reg_t ggml_backend_metal_reg(void) {
- // TODO: make this thread-safe somehow?
+ ggml_backend_metal_device_acq(&g_ggml_ctx_dev_main);
+
+ // register cleanup callback
+ // TODO: not ideal, but not sure if there is a better way to do this in Objective-C
+ atexit(ggml_metal_cleanup);
+
{
g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
/* .api_version = */ GGML_BACKEND_API_VERSION,