sycl : support to malloc memory on device more than 4GB, update the doc and script...

author Neo Zhang <redacted>

Sat, 29 Nov 2025 12:59:44 +0000 (20:59 +0800)

committer Georgi Gerganov <redacted>

Thu, 11 Dec 2025 13:32:50 +0000 (15:32 +0200)
author Neo Zhang <redacted>
Sat, 29 Nov 2025 12:59:44 +0000 (20:59 +0800)
committer Georgi Gerganov <redacted>
Thu, 11 Dec 2025 13:32:50 +0000 (15:32 +0200)
diff --git a/src/ggml-sycl/CMakeLists.txt b/src/ggml-sycl/CMakeLists.txt

index efd78b912cc65cb66fccb0a10b956e44eeafd8a7..88f29221bba94e6deefe40d35afe3894086d4c3f 100644 (file)
--- a/src/ggml-sycl/CMakeLists.txt
+++ b/src/ggml-sycl/CMakeLists.txt
@@ -91,7 +91,10 @@ if (GGML_SYCL_F16)
      add_compile_definitions(GGML_SYCL_F16)
  endif()
  
-if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+if (GGML_SYCL_TARGET STREQUAL "INTEL")
+    add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
+    target_link_options(ggml-sycl PRIVATE  -Xs   -ze-intel-greater-than-4GB-buffer-required)
+elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
      add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
  elseif (GGML_SYCL_TARGET STREQUAL "AMD")
      # INFO: Allowed Sub_group_sizes are not consistent through all
@@ -100,7 +103,8 @@ elseif (GGML_SYCL_TARGET STREQUAL "AMD")
      # Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
      add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
  else()
-    add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
+    # default for other target
+    add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
  endif()
  
  if (GGML_SYCL_GRAPH)
diff --git a/src/ggml-sycl/cpy.cpp b/src/ggml-sycl/cpy.cpp

index 1ec99b0a5d1335f230e518b8158732074256590d..96709554cf692a9e28306496b1779fb36b31cf1a 100644 (file)
--- a/src/ggml-sycl/cpy.cpp
+++ b/src/ggml-sycl/cpy.cpp
@@ -515,9 +515,6 @@ void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
      const int64_t ne = ggml_nelements(src0);
      GGML_ASSERT(ne == ggml_nelements(src1));
  
-    GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
-    GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
-
      GGML_TENSOR_BINARY_OP_LOCALS01;
  
      SYCL_CHECK(ggml_sycl_set_device(ctx.device));
author	Neo Zhang <redacted>
	Sat, 29 Nov 2025 12:59:44 +0000 (20:59 +0800)
committer	Georgi Gerganov <redacted>
	Thu, 11 Dec 2025 13:32:50 +0000 (15:32 +0200)
src/ggml-sycl/CMakeLists.txt		patch \| blob \| history
src/ggml-sycl/cpy.cpp		patch \| blob \| history