From: Reese Levine Date: Wed, 18 Feb 2026 23:06:29 +0000 (-0700) Subject: ggml webgpu: Fix bug in dispatching large matrix-vector multiplication (llama/19535) X-Git-Tag: v0.9.8~121 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e10bfc0db2caae8bbf396b33a90b86668d5dbe1d;p=pkg%2Fggml%2Fsources%2Fggml ggml webgpu: Fix bug in dispatching large matrix-vector multiplication (llama/19535) * Fix bug in dispatching large matrix-vector multiplication --- diff --git a/src/ggml-webgpu/ggml-webgpu.cpp b/src/ggml-webgpu/ggml-webgpu.cpp index 17bb2f47..b5fee480 100644 --- a/src/ggml-webgpu/ggml-webgpu.cpp +++ b/src/ggml-webgpu/ggml-webgpu.cpp @@ -1121,7 +1121,8 @@ static webgpu_command ggml_webgpu_mul_mat(webgpu_context & ctx, uint32_t batches = dst->ne[2] * dst->ne[3]; uint32_t output_groups = CEIL_DIV(dst->ne[0], decisions->outputs_per_wg); uint32_t total_wg = output_groups * batches; - wg_x = total_wg % ctx->global_ctx->capabilities.limits.maxComputeWorkgroupsPerDimension; + // TODO: split large sizes into multiple batches to avoid way over-provisioning workgroups + wg_x = std::min(total_wg, ctx->global_ctx->capabilities.limits.maxComputeWorkgroupsPerDimension); wg_y = CEIL_DIV(total_wg, ctx->global_ctx->capabilities.limits.maxComputeWorkgroupsPerDimension); } else if (use_fast) { auto decisions = static_cast(pipeline.context.get());