Vulkan: Set device max size for host memory to avoid OOM warning and fallback to...

author 0cc4m <redacted>

Thu, 19 Jun 2025 07:15:42 +0000 (09:15 +0200)

committer GitHub <redacted>

Thu, 19 Jun 2025 07:15:42 +0000 (09:15 +0200)
author 0cc4m <redacted>
Thu, 19 Jun 2025 07:15:42 +0000 (09:15 +0200)
committer GitHub <redacted>
Thu, 19 Jun 2025 07:15:42 +0000 (09:15 +0200)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index 8d62303aabd7ff4c90926e9aea683704b25f0931..1375bfeb9dc50f26e36fc4b05ec467b0d09ed251 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -9495,6 +9495,12 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer
      UNUSED(buft);
  }
  
+static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
+    return vk_instance.devices[0]->suballocation_block_size;
+
+    UNUSED(buft);
+}
+
  // Should be changed to return device-specific host buffer type
  // but that probably requires changes in llama.cpp
  ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
@@ -9503,7 +9509,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
              /* .get_name         = */ ggml_backend_vk_host_buffer_type_name,
              /* .alloc_buffer     = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
              /* .get_alignment    = */ ggml_backend_vk_host_buffer_type_get_alignment,
-            /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
+            /* .get_max_size     = */ ggml_backend_vk_host_buffer_type_get_max_size,
              /* .get_alloc_size   = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
              /* .is_host          = */ ggml_backend_cpu_buffer_type()->iface.is_host,
          },
author	0cc4m <redacted>
	Thu, 19 Jun 2025 07:15:42 +0000 (09:15 +0200)
committer	GitHub <redacted>
	Thu, 19 Jun 2025 07:15:42 +0000 (09:15 +0200)