vulkan: use memory budget extension to read memory usage (#15545)

author Gilad S. <redacted>

Mon, 1 Sep 2025 19:17:42 +0000 (22:17 +0300)

committer GitHub <redacted>

Mon, 1 Sep 2025 19:17:42 +0000 (21:17 +0200)
author Gilad S. <redacted>
Mon, 1 Sep 2025 19:17:42 +0000 (22:17 +0300)
committer GitHub <redacted>
Mon, 1 Sep 2025 19:17:42 +0000 (21:17 +0200)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index 3ac459bc4f65f83577802a1a07a949788d286b5f..f7812ab37c8651bfef28d1087ddc89c93b62707c 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -1370,6 +1370,7 @@ struct vk_instance_t {
      PFN_vkCmdInsertDebugUtilsLabelEXT  pfn_vkCmdInsertDebugUtilsLabelEXT  = {};
  
      std::vector<size_t> device_indices;
+    std::vector<bool>   device_supports_membudget;
      vk_device devices[GGML_VK_MAX_DEVICES];
  };
  
@@ -4340,15 +4341,16 @@ static void ggml_vk_instance_init() {
          vk_instance.pfn_vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdBeginDebugUtilsLabelEXT");
          vk_instance.pfn_vkCmdEndDebugUtilsLabelEXT =   (PFN_vkCmdEndDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdEndDebugUtilsLabelEXT");
          vk_instance.pfn_vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdInsertDebugUtilsLabelEXT");
-
      }
  
      vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr;
  
+    std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
+
      // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan
      char * devices_env = getenv("GGML_VK_VISIBLE_DEVICES");
      if (devices_env != nullptr) {
-        size_t num_available_devices = vk_instance.instance.enumeratePhysicalDevices().size();
+        size_t num_available_devices = devices.size();
  
          std::string devices(devices_env);
          std::replace(devices.begin(), devices.end(), ',', ' ');
@@ -4363,8 +4365,6 @@ static void ggml_vk_instance_init() {
              vk_instance.device_indices.push_back(tmp);
          }
      } else {
-        std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
-
          // If no vulkan devices are found, return early
          if (devices.empty()) {
              GGML_LOG_INFO("ggml_vulkan: No devices found.\n");
@@ -4469,6 +4469,19 @@ static void ggml_vk_instance_init() {
      GGML_LOG_DEBUG("ggml_vulkan: Found %zu Vulkan devices:\n", vk_instance.device_indices.size());
  
      for (size_t i = 0; i < vk_instance.device_indices.size(); i++) {
+        vk::PhysicalDevice vkdev = devices[vk_instance.device_indices[i]];
+        std::vector<vk::ExtensionProperties> extensionprops = vkdev.enumerateDeviceExtensionProperties();
+
+        bool membudget_supported = false;
+        for (const auto & ext : extensionprops) {
+            if (strcmp(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, ext.extensionName) == 0) {
+                membudget_supported = true;
+                break;
+            }
+        }
+
+        vk_instance.device_supports_membudget.push_back(membudget_supported);
+
          ggml_vk_print_gpu_info(i);
      }
  }
@@ -11654,15 +11667,29 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
  
  void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
      GGML_ASSERT(device < (int) vk_instance.device_indices.size());
+    GGML_ASSERT(device < (int) vk_instance.device_supports_membudget.size());
  
      vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
+    vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
+    vk::PhysicalDeviceMemoryProperties2 memprops = {};
+    bool membudget_supported = vk_instance.device_supports_membudget[device];
+
+    if (membudget_supported) {
+        memprops.pNext = &budgetprops;
+    }
+    vkdev.getMemoryProperties2(&memprops);
  
-    vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
+    for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
+        const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
  
-    for (const vk::MemoryHeap& heap : memprops.memoryHeaps) {
          if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
              *total = heap.size;
-            *free = heap.size;
+
+            if (membudget_supported && i < budgetprops.heapUsage.size()) {
+                *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
+            } else {
+                *free = heap.size;
+            }
              break;
          }
      }
author	Gilad S. <redacted>
	Mon, 1 Sep 2025 19:17:42 +0000 (22:17 +0300)
committer	GitHub <redacted>
	Mon, 1 Sep 2025 19:17:42 +0000 (21:17 +0200)