]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
sycl : support to malloc memory on device more than 4GB, update the doc and script...
authorNeo Zhang <redacted>
Sat, 29 Nov 2025 12:59:44 +0000 (20:59 +0800)
committerGeorgi Gerganov <redacted>
Thu, 11 Dec 2025 13:32:50 +0000 (15:32 +0200)
Co-authored-by: Neo Zhang Jianyu <redacted>
src/ggml-sycl/CMakeLists.txt
src/ggml-sycl/cpy.cpp

index efd78b912cc65cb66fccb0a10b956e44eeafd8a7..88f29221bba94e6deefe40d35afe3894086d4c3f 100644 (file)
@@ -91,7 +91,10 @@ if (GGML_SYCL_F16)
     add_compile_definitions(GGML_SYCL_F16)
 endif()
 
-if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+if (GGML_SYCL_TARGET STREQUAL "INTEL")
+    add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
+    target_link_options(ggml-sycl PRIVATE  -Xs   -ze-intel-greater-than-4GB-buffer-required)
+elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
     add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
 elseif (GGML_SYCL_TARGET STREQUAL "AMD")
     # INFO: Allowed Sub_group_sizes are not consistent through all
@@ -100,7 +103,8 @@ elseif (GGML_SYCL_TARGET STREQUAL "AMD")
     # Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
     add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
 else()
-    add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
+    # default for other target
+    add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
 endif()
 
 if (GGML_SYCL_GRAPH)
index 1ec99b0a5d1335f230e518b8158732074256590d..96709554cf692a9e28306496b1779fb36b31cf1a 100644 (file)
@@ -515,9 +515,6 @@ void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
     const int64_t ne = ggml_nelements(src0);
     GGML_ASSERT(ne == ggml_nelements(src1));
 
-    GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
-    GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
-
     GGML_TENSOR_BINARY_OP_LOCALS01;
 
     SYCL_CHECK(ggml_sycl_set_device(ctx.device));