replace the magic nunber 768 by max work group size to support iGPU (llama/19920)

author Neo Zhang <redacted>

Fri, 27 Feb 2026 01:26:07 +0000 (09:26 +0800)

committer Georgi Gerganov <redacted>

Fri, 27 Feb 2026 10:04:54 +0000 (12:04 +0200)
author Neo Zhang <redacted>
Fri, 27 Feb 2026 01:26:07 +0000 (09:26 +0800)
committer Georgi Gerganov <redacted>
Fri, 27 Feb 2026 10:04:54 +0000 (12:04 +0200)
diff --git a/src/ggml-sycl/add-id.cpp b/src/ggml-sycl/add-id.cpp

index 00c073cf9373254dee53d11ad04376a29f19d4f1..8929017a999f206aa72e23aba73cb49812d91949 100644 (file)
--- a/src/ggml-sycl/add-id.cpp
+++ b/src/ggml-sycl/add-id.cpp
@@ -55,7 +55,11 @@ void ggml_sycl_add_id(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
    const int32_t* src2_d = (const int32_t*)src2->data;
    float* dst_d = (float*)dst->data;
  
-  int threads = std::min((int)ne00, 768);  // cols
+  const unsigned int max_work_group_size = ggml_sycl_info().max_work_group_sizes[ctx.device];
+  assert(work_group_size % (WARP_SIZE * WARP_SIZE) == 0);
+
+  int threads = std::min((unsigned int)ne00, max_work_group_size);  // cols
+
    ctx.stream()->parallel_for(
        sycl::nd_range<3>(
            sycl::range<3>(1, ne02, ne01) * sycl::range<3>(1, 1, threads),
author	Neo Zhang <redacted>
	Fri, 27 Feb 2026 01:26:07 +0000 (09:26 +0800)
committer	Georgi Gerganov <redacted>
	Fri, 27 Feb 2026 10:04:54 +0000 (12:04 +0200)