From: Neo Zhang Date: Thu, 2 Apr 2026 07:08:32 +0000 (+0800) Subject: sycl : fix llama_kv_cache hang when kv_cache is huge: 5GB (#21283) X-Git-Tag: upstream/0.0.8681~52 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4888137b1736b706e39806025d24e4ca342f1e4a;p=pkg%2Fggml%2Fsources%2Fllama.cpp sycl : fix llama_kv_cache hang when kv_cache is huge: 5GB (#21283) --- diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 456b1699f..28be49397 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -569,9 +569,15 @@ static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer, SYCL_CHECK( CHECK_TRY_ERROR(dpct::get_current_device().queues_wait_and_throw())); - SYCL_CHECK(CHECK_TRY_ERROR((*stream) - .memset(ctx->dev_ptr, value, buffer->size) - .wait())); + constexpr size_t MAX_CHUNK = 2ULL << 30; // 2 GiB + for (size_t off = 0; off < buffer->size; off += MAX_CHUNK) { + size_t chunk = std::min(buffer->size - off, MAX_CHUNK); + SYCL_CHECK(CHECK_TRY_ERROR( + (*stream) + .memset(static_cast(ctx->dev_ptr) + off, value, chunk) + .wait() + )); + } } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__