CUDA: static assert to prevent misuse of memcpy_1 (#17198)

author Johannes Gäßler <redacted>

Wed, 12 Nov 2025 22:13:55 +0000 (23:13 +0100)

committer GitHub <redacted>

Wed, 12 Nov 2025 22:13:55 +0000 (23:13 +0100)
author Johannes Gäßler <redacted>
Wed, 12 Nov 2025 22:13:55 +0000 (23:13 +0100)
committer GitHub <redacted>
Wed, 12 Nov 2025 22:13:55 +0000 (23:13 +0100)
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh

index ca876459d404da06bd96b8b91a77769790b65eca..25e9308d756c2ed0be3cc1204e6c9fbf7bd0f7fc 100644 (file)
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -586,6 +586,12 @@ static __device__ __forceinline__ void ggml_cuda_mad(half2 & acc, const half2 v,
  //     If dst and src point at different address spaces then they are guaranteed to not be aliased.
  template <int nbytes, int alignment = 0>
  static __device__ __forceinline__ void ggml_cuda_memcpy_1(void * __restrict__ dst, const void * __restrict__ src) {
+    static_assert(
+        nbytes <= ggml_cuda_get_max_cpy_bytes() || alignment == 0,
+        "You are misusing the alignment parameter for ggml_cuda_memcpy_1. "
+        "The intent is for the parameter is only as a workaround if either one of the pointers is not properly aligned. "
+        "If you use it to do more bytes per copy than ggml_cuda_max_cpy_bytes() the reads and writes may not be coalesced. "
+        "Call ggml_cuda_memcpy_1 in a loop instead.");
      if constexpr (alignment != 0) {
          static_assert(nbytes % alignment == 0, "bad alignment");
      }
author	Johannes Gäßler <redacted>
	Wed, 12 Nov 2025 22:13:55 +0000 (23:13 +0100)
committer	GitHub <redacted>
	Wed, 12 Nov 2025 22:13:55 +0000 (23:13 +0100)