From: Nicolò Scipione Date: Tue, 20 May 2025 00:54:43 +0000 (+0200) Subject: sycl : Overcoming workaround for mmap() allocation on Windows (llama/13482) X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=ee3f177cbaa2e505e424d35cb5695c72149d5d7e;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp sycl : Overcoming workaround for mmap() allocation on Windows (llama/13482) * Remove mmap workaround on windows After some testing I found that mmap is supported on windows and for many GPUs on Linux. Therefore I remove the workaround for windows since it is not necessary. * Update llama-bench README SYCL backend introduced a workaround that allows execution of llama-bench also without specifying `--mmp 0` flag --- diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 5ff7fa13..2f61be71 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -385,16 +385,17 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context; ggml_sycl_set_device(ctx->device); auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue()); - SYCL_CHECK( - CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); + SYCL_CHECK(CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); +#ifndef _WIN32 // Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU. // This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here. - char* host_buf = (char*)malloc(size); + char * host_buf = (char *) malloc(size); memcpy(host_buf, data, size); - SYCL_CHECK( - CHECK_TRY_ERROR((*stream).memcpy((char *)tensor->data + offset, host_buf, size) - .wait())); + SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, host_buf, size).wait())); free(host_buf); +#else + SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, data, size).wait())); +#endif } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__