bool prealloc_x_need_sync, prealloc_y_need_sync, prealloc_split_k_need_sync;
vk_context_ref compute_ctx;
- vk_context_ref transfer_ctx;
std::vector<vk_context_ref> tensor_ctxs;
uint32_t pipeline_descriptor_set_requirements {};
vk_command_pool compute_cmd_pool;
- vk_command_pool transfer_cmd_pool;
// number of additional consecutive nodes that are being fused with the
// node currently being processed
ctx->almost_ready_fence = ctx->device->device.createFence({});
ctx->compute_cmd_pool.init(ctx->device, &ctx->device->compute_queue);
- ctx->transfer_cmd_pool.init(ctx->device, &ctx->device->transfer_queue);
if (vk_perf_logger_enabled) {
ctx->perf_logger = std::unique_ptr<vk_perf_logger>(new vk_perf_logger());
free(d_chk);
ggml_vk_command_pool_cleanup(ctx->device, ctx->compute_cmd_pool);
- ggml_vk_command_pool_cleanup(ctx->device, ctx->transfer_cmd_pool);
ggml_vk_destroy_buffer(d_X);
ggml_vk_destroy_buffer(d_Y);
ggml_vk_submit(subctx, {});
ctx->submit_pending = true;
ggml_vk_synchronize(ctx);
+ GGML_ASSERT(ctx->compute_ctx.expired());
ggml_vk_ctx_begin(ctx->device, subctx);
+ ctx->compute_ctx = subctx;
}
if (ctx->prealloc_x == nullptr || (ctx->prealloc_size_x > 0 && ctx->prealloc_x->size < ctx->prealloc_size_x)) {
ggml_vk_destroy_buffer(ctx->prealloc_y);
}
ctx->prealloc_y = ggml_vk_create_buffer_device(ctx->device, ctx->prealloc_size_y);
+ ctx->prealloc_y_last_tensor_used = nullptr;
}
if (ctx->prealloc_split_k == nullptr || (ctx->prealloc_size_split_k > 0 && ctx->prealloc_split_k->size < ctx->prealloc_size_split_k)) {
VK_LOG_MEMORY("ggml_vk_preallocate_buffers(split_k_size: " << ctx->prealloc_size_split_k << ")");
ctx->prealloc_x_need_sync = ctx->prealloc_y_need_sync = ctx->prealloc_split_k_need_sync = false;
ggml_vk_command_pool_cleanup(ctx->device, ctx->compute_cmd_pool);
- ggml_vk_command_pool_cleanup(ctx->device, ctx->transfer_cmd_pool);
for (size_t i = 0; i < ctx->gc.semaphores.size(); i++) {
ctx->device->device.destroySemaphore({ ctx->gc.semaphores[i].s });
static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) {
VK_LOG_DEBUG("ggml_vk_cleanup(" << ctx->name << ")");
// discard any unsubmitted command buffers
- ctx->transfer_ctx.reset();
+ ctx->compute_ctx.reset();
// wait for any pending command buffers to finish
ggml_vk_synchronize(ctx);
ctx->descriptor_sets.clear();
ctx->compute_cmd_pool.destroy(ctx->device->device);
- ctx->transfer_cmd_pool.destroy(ctx->device->device);
if (vk_perf_logger_enabled) {
ctx->perf_logger->print_timings(true);
}
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer->context;
- vk_context transfer_ctx;
+ vk_context compute_ctx;
- if (ctx->transfer_ctx.expired()) {
+ if (ctx->compute_ctx.expired()) {
// Initialize new transfer context
- transfer_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
- ctx->transfer_ctx = transfer_ctx;
- ggml_vk_ctx_begin(ctx->device, transfer_ctx);
+ compute_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
+ ctx->compute_ctx = compute_ctx;
+ ggml_vk_ctx_begin(ctx->device, compute_ctx);
} else {
- transfer_ctx = ctx->transfer_ctx.lock();
+ compute_ctx = ctx->compute_ctx.lock();
}
vk_buffer buf = buf_ctx->dev_buffer;
auto dst_offset = vk_tensor_offset(tensor) + tensor->view_offs + offset;
- bool ret = ggml_vk_buffer_write_async(transfer_ctx, buf, dst_offset, data, size);
+ bool ret = ggml_vk_buffer_write_async(compute_ctx, buf, dst_offset, data, size);
if (!ret) {
ggml_vk_ensure_sync_staging_buffer(ctx, size);
- ggml_vk_sync_buffers(nullptr, transfer_ctx);
+ ggml_vk_sync_buffers(nullptr, compute_ctx);
vk::BufferCopy buffer_cpy;
buffer_cpy.srcOffset = 0;
buffer_cpy.dstOffset = dst_offset;
buffer_cpy.size = size;
- transfer_ctx->s->buffer.copyBuffer(ctx->sync_staging->buffer, buf->buffer, { buffer_cpy });
- deferred_memcpy(ctx->sync_staging->ptr, data, size, &transfer_ctx->in_memcpys);
+ compute_ctx->s->buffer.copyBuffer(ctx->sync_staging->buffer, buf->buffer, { buffer_cpy });
+ deferred_memcpy(ctx->sync_staging->ptr, data, size, &compute_ctx->in_memcpys);
ggml_vk_synchronize(ctx);
}
}
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer->context;
- vk_context transfer_ctx;
+ vk_context compute_ctx;
- if (ctx->transfer_ctx.expired()) {
+ if (ctx->compute_ctx.expired()) {
// Initialize new transfer context
- transfer_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
- ctx->transfer_ctx = transfer_ctx;
- ggml_vk_ctx_begin(ctx->device, transfer_ctx);
+ compute_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
+ ctx->compute_ctx = compute_ctx;
+ ggml_vk_ctx_begin(ctx->device, compute_ctx);
} else {
- transfer_ctx = ctx->transfer_ctx.lock();
+ compute_ctx = ctx->compute_ctx.lock();
}
vk_buffer buf = buf_ctx->dev_buffer;
auto src_offset = vk_tensor_offset(tensor) + tensor->view_offs + offset;
- bool ret = ggml_vk_buffer_read_async(transfer_ctx, buf, src_offset, data, size);
+ bool ret = ggml_vk_buffer_read_async(compute_ctx, buf, src_offset, data, size);
// If that failed, copy synchronously through a staging buffer
if (!ret) {
ggml_vk_ensure_sync_staging_buffer(ctx, size);
- ggml_vk_sync_buffers(nullptr, transfer_ctx);
+ ggml_vk_sync_buffers(nullptr, compute_ctx);
vk::BufferCopy buffer_cpy;
buffer_cpy.srcOffset = src_offset;
buffer_cpy.dstOffset = 0;
buffer_cpy.size = size;
- transfer_ctx->s->buffer.copyBuffer(buf->buffer, ctx->sync_staging->buffer, { buffer_cpy });
- deferred_memcpy(data, ctx->sync_staging->ptr, size, &transfer_ctx->out_memcpys);
+ compute_ctx->s->buffer.copyBuffer(buf->buffer, ctx->sync_staging->buffer, { buffer_cpy });
+ deferred_memcpy(data, ctx->sync_staging->ptr, size, &compute_ctx->out_memcpys);
ggml_vk_synchronize(ctx);
}
}
ggml_backend_vk_buffer_context * src_buf_ctx = (ggml_backend_vk_buffer_context *)src->buffer->context;
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
- vk_context transfer_ctx;
+ vk_context compute_ctx;
- if (ctx->transfer_ctx.expired()) {
+ if (ctx->compute_ctx.expired()) {
// Initialize new transfer context
- transfer_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
- ctx->transfer_ctx = transfer_ctx;
- ggml_vk_ctx_begin(ctx->device, transfer_ctx);
+ compute_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
+ ctx->compute_ctx = compute_ctx;
+ ggml_vk_ctx_begin(ctx->device, compute_ctx);
} else {
- transfer_ctx = ctx->transfer_ctx.lock();
+ compute_ctx = ctx->compute_ctx.lock();
}
vk_buffer src_buf = src_buf_ctx->dev_buffer;
vk_buffer dst_buf = dst_buf_ctx->dev_buffer;
- ggml_vk_buffer_copy_async(transfer_ctx, dst_buf, vk_tensor_offset(dst) + dst->view_offs, src_buf, vk_tensor_offset(src) + src->view_offs, ggml_nbytes(src));
+ ggml_vk_buffer_copy_async(compute_ctx, dst_buf, vk_tensor_offset(dst) + dst->view_offs, src_buf, vk_tensor_offset(src) + src->view_offs, ggml_nbytes(src));
return true;
}
static void ggml_vk_synchronize(ggml_backend_vk_context * ctx) {
VK_LOG_DEBUG("ggml_vk_synchronize()");
- bool do_transfer = !ctx->transfer_ctx.expired();
+ bool do_transfer = !ctx->compute_ctx.expired();
- vk_context transfer_ctx;
+ vk_context compute_ctx;
if (do_transfer) {
- transfer_ctx = ctx->transfer_ctx.lock();
+ compute_ctx = ctx->compute_ctx.lock();
- ggml_vk_ctx_end(transfer_ctx);
+ ggml_vk_ctx_end(compute_ctx);
- for (auto& cpy : transfer_ctx->in_memcpys) {
+ for (auto& cpy : compute_ctx->in_memcpys) {
memcpy(cpy.dst, cpy.src, cpy.n);
}
- ggml_vk_submit(transfer_ctx, {});
+ ggml_vk_submit(compute_ctx, {});
ctx->submit_pending = true;
}
}
if (do_transfer) {
- for (auto& cpy : transfer_ctx->out_memcpys) {
+ for (auto& cpy : compute_ctx->out_memcpys) {
memcpy(cpy.dst, cpy.src, cpy.n);
}
- ctx->transfer_ctx.reset();
+ ctx->compute_ctx.reset();
}
}
ggml_vk_submit(compute_ctx, ctx->device->fence);
VK_CHECK(ctx->device->device.waitForFences({ ctx->device->fence }, true, UINT64_MAX), "GGML_VULKAN_PERF waitForFences");
ctx->device->device.resetFences({ ctx->device->fence });
+ ctx->compute_ctx.reset();
// Get the results and pass them to the logger
std::vector<uint64_t> timestamps(cgraph->n_nodes + 1);
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
vk_event *vkev = (vk_event *)event->context;
- vk_context transfer_ctx;
+ vk_context compute_ctx;
- if (ctx->transfer_ctx.expired()) {
+ if (ctx->compute_ctx.expired()) {
// Initialize new transfer context
- transfer_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
- ctx->transfer_ctx = transfer_ctx;
- ggml_vk_ctx_begin(ctx->device, transfer_ctx);
+ compute_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
+ ctx->compute_ctx = compute_ctx;
+ ggml_vk_ctx_begin(ctx->device, compute_ctx);
} else {
- transfer_ctx = ctx->transfer_ctx.lock();
+ compute_ctx = ctx->compute_ctx.lock();
}
// the backend interface doesn't have an explicit reset, so reset it here
ctx->device->device.resetEvent(vkev->event);
ctx->device->device.resetFences({ vkev->fence });
- ggml_vk_set_event(transfer_ctx, vkev->event);
+ ggml_vk_set_event(compute_ctx, vkev->event);
- ggml_vk_ctx_end(transfer_ctx);
+ ggml_vk_ctx_end(compute_ctx);
- ggml_vk_submit(transfer_ctx, {vkev->fence});
+ ggml_vk_submit(compute_ctx, {vkev->fence});
ctx->submit_pending = true;
- ctx->transfer_ctx.reset();
+ ctx->compute_ctx.reset();
}
static void ggml_backend_vk_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
vk_event *vkev = (vk_event *)event->context;
- vk_context transfer_ctx;
+ vk_context compute_ctx;
- if (ctx->transfer_ctx.expired()) {
+ if (ctx->compute_ctx.expired()) {
// Initialize new transfer context
- transfer_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
- ctx->transfer_ctx = transfer_ctx;
- ggml_vk_ctx_begin(ctx->device, transfer_ctx);
+ compute_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
+ ctx->compute_ctx = compute_ctx;
+ ggml_vk_ctx_begin(ctx->device, compute_ctx);
} else {
- transfer_ctx = ctx->transfer_ctx.lock();
+ compute_ctx = ctx->compute_ctx.lock();
}
- ggml_vk_wait_events(transfer_ctx, {vkev->event});
- ggml_vk_ctx_end(transfer_ctx);
- ctx->transfer_ctx.reset();
+ ggml_vk_wait_events(compute_ctx, {vkev->event});
+ ggml_vk_ctx_end(compute_ctx);
+ ctx->compute_ctx.reset();
}
// TODO: enable async and synchronize