q.cmd_buffer_idx = 0;
}
-static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) {
+static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) {
+ for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) {
+ vk::MemoryType memory_type = mem_props->memoryTypes[i];
+ if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) &&
+ (flags & memory_type.propertyFlags) == flags &&
+ mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) {
+ return static_cast<int32_t>(i);
+ }
+ }
+ return UINT32_MAX;
+}
+
+static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) {
#ifdef GGML_VULKAN_DEBUG
- std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl;
+ std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ", " << to_string(fallback_flags) << ")" << std::endl;
#endif
vk_buffer buf = std::make_shared<vk_buffer_struct>();
uint32_t memory_type_index = UINT32_MAX;
- for (uint32_t i = 0; i < mem_props.memoryTypeCount; ++i) {
- vk::MemoryType memory_type = mem_props.memoryTypes[i];
- if ((mem_req.memoryTypeBits & ((uint64_t)1 << i)) && (req_flags & memory_type.propertyFlags) == req_flags && mem_props.memoryHeaps[memory_type.heapIndex].size >= mem_req.size) {
- memory_type_index = i;
- break;
- }
+ memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
+ buf->memory_property_flags = req_flags;
+
+ if (memory_type_index == UINT32_MAX && fallback_flags) {
+ memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags);
+ buf->memory_property_flags = fallback_flags;
}
- if (memory_type_index >= mem_props.memoryTypeCount) {
+ if (memory_type_index == UINT32_MAX) {
ctx->device.lock()->device.destroyBuffer(buf->buffer);
buf->size = 0;
throw vk::OutOfDeviceMemoryError("No suitable memory type found");
buf->size = 0;
throw e;
}
- buf->memory_property_flags = req_flags;
buf->ptr = nullptr;
- if (req_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
+ if (buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
buf->ptr = ctx->device.lock()->device.mapMemory(buf->device_memory, 0, VK_WHOLE_SIZE);
}
return buf;
}
-static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) {
+static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags fallback_flags = vk::MemoryPropertyFlags(0)) {
try {
- return ggml_vk_create_buffer(ctx, size, req_flags);
+ return ggml_vk_create_buffer(ctx, size, req_flags, fallback_flags);
} catch (const vk::SystemError& e) {
std::cerr << "ggml_vulkan: Memory allocation of size " << size << " failed." << std::endl;
std::cerr << "ggml_vulkan: " << e.what() << std::endl;
static vk_buffer ggml_vk_create_buffer_device(ggml_backend_vk_context * ctx, size_t size) {
vk_buffer buf;
try {
- buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal);
- } catch (const vk::SystemError& e) {
if (ctx->device.lock()->uma) {
// Fall back to host memory type
- buf = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
+ buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
} else {
- std::cerr << "ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl;
- std::cerr << "ggml_vulkan: " << e.what() << std::endl;
- throw e;
+ buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eDeviceLocal);
}
+ } catch (const vk::SystemError& e) {
+ std::cerr << "ggml_vulkan: Device memory allocation of size " << size << " failed." << std::endl;
+ std::cerr << "ggml_vulkan: " << e.what() << std::endl;
+ throw e;
}
return buf;
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl;
#endif
- vk_buffer buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
+ vk_buffer buf = ggml_vk_create_buffer(ctx, size,
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
if(!(buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) {
fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory\n",
static void ggml_vk_ensure_sync_staging_buffer(ggml_backend_vk_context * ctx, size_t size) {
if (ctx->sync_staging == nullptr || ctx->sync_staging->size < size) {
ggml_vk_destroy_buffer(ctx->sync_staging);
- ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
+ ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size,
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
}
}
std::cerr << "ggml_vk_preallocate_buffers(qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << ")" << std::endl;
#endif
#if defined(GGML_VULKAN_RUN_TESTS)
- ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
+ ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul,
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
ggml_vk_test_transfer(ctx, 8192 * 1000, false);
ggml_vk_test_transfer(ctx, 8192 * 1000, true);
if (ctx->staging != nullptr) {
ggml_vk_destroy_buffer(ctx->staging);
}
- ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
+ ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size,
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
+ vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
}
}