From: 0cc4m Date: Wed, 31 Jan 2024 10:44:19 +0000 (+0100) Subject: Vulkan Fixes (llama/5223) X-Git-Tag: upstream/0.0.1642~1006 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=90cfbac0db4c0d6b868e438f6b0473b4f518a117;p=pkg%2Fggml%2Fsources%2Fggml Vulkan Fixes (llama/5223) * Fix Vulkan F16 models * Fix Vulkan context shift crash * Add Vulkan to common.cpp dump_non_result_info_yaml function * Fix bug in Vulkan CPY op * Fix small matrix multiplication errors in AMD GPUs on Windows or with amdvlk Co-authored-by: Engininja2 --------- Co-authored-by: Engininja2 --- diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 1d93ec6b..bccc40bf 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -817,7 +817,7 @@ static void ggml_vk_load_shaders() { // mulmat std::initializer_list warptile_l = { 128, 128, 128, 16, vk_device.subgroup_size * 2, 64, 2, 4, 4, vk_device.subgroup_size }; std::initializer_list warptile_m = { 128, 64, 64, 16, vk_device.subgroup_size, 32, 2, 4, 2, vk_device.subgroup_size }; - std::initializer_list warptile_s = { vk_device.subgroup_size, 32, 32, 8, 32, 32, 2, 2, 2, vk_device.subgroup_size }; + std::initializer_list warptile_s = { vk_device.subgroup_size, 32, 32, 16, 32, 32, 2, 2, 2, vk_device.subgroup_size }; std::array l_wg_denoms = {128, 128, 1 }; std::array m_wg_denoms = { 64, 64, 1 }; @@ -2873,7 +2873,8 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm if (op == GGML_OP_CPY) { GGML_ASSERT(!transfer_src0); GGML_ASSERT(!transfer_src1); - d_sz = dst->ne[1] * dst->nb[1]; + x_sz = ggml_nbytes(src0); + d_sz = ggml_nbytes(dst); if (extra->offset + d_sz >= d_D->size) { d_sz = VK_WHOLE_SIZE; @@ -4556,8 +4557,15 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml } ggml_vk_preallocate_buffers(); + int last_node = cgraph->n_nodes - 1; + + // If the last op in the cgraph isn't backend GPU, the command buffer doesn't get closed properly + while (last_node > 0 && cgraph->nodes[last_node]->backend != GGML_BACKEND_GPU) { + last_node -= 1; + } + for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(cgraph->nodes[i], i == cgraph->n_nodes - 1); + ggml_vk_build_graph(cgraph->nodes[i], i == last_node); } ggml_compute_params params = {};