}
}
+static std::vector<uint32_t> ggml_vk_find_memory_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) {
+ std::vector<uint32_t> indices;
-static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) {
for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) {
vk::MemoryType memory_type = mem_props->memoryTypes[i];
if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) &&
(flags & memory_type.propertyFlags) == flags &&
mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) {
- return static_cast<int32_t>(i);
+ indices.push_back(i);
}
}
- return UINT32_MAX;
+ return indices;
}
static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std::initializer_list<vk::MemoryPropertyFlags> & req_flags_list) {
for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) {
const auto & req_flags = *it;
- uint32_t memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
+ const std::vector<uint32_t> memory_type_indices = ggml_vk_find_memory_properties(&mem_props, &mem_req, req_flags);
- if (memory_type_index == UINT32_MAX) {
+ if (memory_type_indices.empty()) {
continue;
}
buf->memory_property_flags = req_flags;
- try {
- buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index, &mem_flags_info });
- break;
- } catch (const vk::SystemError& e) {
- // loop and retry
- // during last attempt throw the exception
- if (it + 1 == req_flags_list.end()) {
- device->device.destroyBuffer(buf->buffer);
- throw e;
+ for (auto mtype_it = memory_type_indices.begin(); mtype_it != memory_type_indices.end(); mtype_it++) {
+ try {
+ buf->device_memory = device->device.allocateMemory({ mem_req.size, *mtype_it, &mem_flags_info });
+ break;
+ } catch (const vk::SystemError& e) {
+ // loop and retry
+ // during last attempt throw the exception
+ if (it + 1 == req_flags_list.end() && mtype_it + 1 == memory_type_indices.end()) {
+ device->device.destroyBuffer(buf->buffer);
+ throw e;
+ }
}
}
}
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
vk::PhysicalDeviceMemoryProperties2 memprops = {};
- bool membudget_supported = vk_instance.device_supports_membudget[device];
+ const bool membudget_supported = vk_instance.device_supports_membudget[device];
+ const bool is_integrated_gpu = vkdev.getProperties().deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
if (membudget_supported) {
memprops.pNext = &budgetprops;
}
vkdev.getMemoryProperties2(&memprops);
+ *total = 0;
+ *free = 0;
+
for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
- if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
- *total = heap.size;
+ if (is_integrated_gpu || (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+ *total += heap.size;
if (membudget_supported && i < budgetprops.heapUsage.size()) {
- *free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
+ *free += budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
} else {
- *free = heap.size;
+ *free += heap.size;
}
- break;
}
}
}