]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
ggml : add max buffer sizes to opencl and metal backends (llama/5181)
authorslaren <redacted>
Mon, 29 Jan 2024 08:05:13 +0000 (09:05 +0100)
committerGeorgi Gerganov <redacted>
Tue, 30 Jan 2024 19:27:59 +0000 (21:27 +0200)
ggml-metal.m
ggml-opencl.cpp

index 98f0db6f42c192108a79ccdd1b48cb83eb144da4..c095be3e34ff77ce4938043215f240c3689e5128 100644 (file)
@@ -2398,6 +2398,16 @@ GGML_CALL static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backen
     UNUSED(buft);
 }
 
+GGML_CALL static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
+    id<MTLDevice> device = ggml_backend_metal_get_device();
+    size_t max_size = device.maxBufferLength;
+    ggml_backend_metal_free_device();
+
+    return max_size;
+
+    UNUSED(buft);
+}
+
 GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
     return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend);
 
@@ -2416,7 +2426,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
             /* .get_name         = */ ggml_backend_metal_buffer_type_get_name,
             /* .alloc_buffer     = */ ggml_backend_metal_buffer_type_alloc_buffer,
             /* .get_alignment    = */ ggml_backend_metal_buffer_type_get_alignment,
-            /* .get_max_size     = */ NULL, // TODO: return device.maxBufferLength
+            /* .get_max_size     = */ ggml_backend_metal_buffer_type_get_max_size,
             /* .get_alloc_size   = */ NULL, // defaults to ggml_nbytes
             /* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
             /* .is_host          = */ ggml_backend_metal_buffer_type_is_host,
index d40663535f13d05e8f5e88497af4be080bbc84e0..797bee66799b5e3b50f9a486e1a2653cc420e80d 100644 (file)
@@ -2125,6 +2125,15 @@ static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_
     GGML_UNUSED(buffer_type);
 }
 
+static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
+    static size_t max_size = -1;
+    if (max_size == (size_t)-1) {
+        ggml_cl_init();
+        clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_size, NULL);
+    }
+    return max_size;
+}
+
 static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
     //return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
     return ggml_backend_is_cpu(backend);
@@ -2136,7 +2145,7 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
     /* .get_name         = */ ggml_backend_opencl_buffer_type_name,
     /* .alloc_buffer     = */ ggml_backend_opencl_buffer_type_alloc_buffer,
     /* .get_alignment    = */ ggml_backend_opencl_buffer_type_get_alignment,
-    /* .get_max_size     = */ NULL, // TODO: return from device info
+    /* .get_max_size     = */ ggml_backend_opencl_buffer_type_get_max_size,
     /* .get_alloc_size   = */ NULL,
     /* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
     /* .is_host          = */ NULL,