sycl : Overcoming workaround for mmap() allocation on Windows (#13482)

author Nicolò Scipione <redacted>

Tue, 20 May 2025 00:54:43 +0000 (02:54 +0200)

committer GitHub <redacted>

Tue, 20 May 2025 00:54:43 +0000 (08:54 +0800)
author Nicolò Scipione <redacted>
Tue, 20 May 2025 00:54:43 +0000 (02:54 +0200)
committer GitHub <redacted>
Tue, 20 May 2025 00:54:43 +0000 (08:54 +0800)
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp

index 5ff7fa13db0bef26414e8a2c4ce018f0d976e750..2f61be714051c5a886750e94f94a89ce0b3daf1f 100644 (file)
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -385,16 +385,17 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
      ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
      ggml_sycl_set_device(ctx->device);
      auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
-    SYCL_CHECK(
-        CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
+    SYCL_CHECK(CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
+#ifndef _WIN32
      // Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU.
      // This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here.
-    char* host_buf = (char*)malloc(size);
+    char * host_buf = (char *) malloc(size);
      memcpy(host_buf, data, size);
-    SYCL_CHECK(
-        CHECK_TRY_ERROR((*stream).memcpy((char *)tensor->data + offset, host_buf, size)
-                             .wait()));
+    SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, host_buf, size).wait()));
      free(host_buf);
+#else
+    SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, data, size).wait()));
+#endif
  }
  catch (sycl::exception const &exc) {
    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
diff --git a/tools/llama-bench/README.md b/tools/llama-bench/README.md

index 0479f81a30b5578fe2bf692f265abf905826a816..31a27308743469a00190e6e184b5978dc781a5ab 100644 (file)
--- a/tools/llama-bench/README.md
+++ b/tools/llama-bench/README.md
@@ -80,10 +80,6 @@ Using the `-d <n>` option, each test can be run at a specified context depth, pr
  
  For a description of the other options, see the [main example](../main/README.md).
  
-Note:
-
-- When using SYCL backend, there would be hang issue in some cases. Please set `--mmp 0`.
-
  ## Examples
  
  ### Text generation with different models
author	Nicolò Scipione <redacted>
	Tue, 20 May 2025 00:54:43 +0000 (02:54 +0200)
committer	GitHub <redacted>
	Tue, 20 May 2025 00:54:43 +0000 (08:54 +0800)
ggml/src/ggml-sycl/ggml-sycl.cpp		patch \| blob \| history
tools/llama-bench/README.md		patch \| blob \| history