vulkan: support noncontig i32 copy (llama/17328)

author Jeff Bolz <redacted>

Tue, 18 Nov 2025 06:41:24 +0000 (00:41 -0600)

committer Georgi Gerganov <redacted>

Fri, 12 Dec 2025 15:53:03 +0000 (17:53 +0200)
author Jeff Bolz <redacted>
Tue, 18 Nov 2025 06:41:24 +0000 (00:41 -0600)
committer Georgi Gerganov <redacted>
Fri, 12 Dec 2025 15:53:03 +0000 (17:53 +0200)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index 75b76e593bbc0ff05792dd40da0abf5c0eed7863..11262c19894c818e7d8605e83eac6d532315e2bb 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -13644,10 +13644,11 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
                  }
  
                  // We can handle copying from a type to the same type if it's
-                // contiguous (memcpy). We use f16 or f32 shaders to do the copy,
+                // either not quantized or is quantized and contiguous.
+                // We use f16 or f32 shaders to do the copy,
                  // so the type/block size must be a multiple of 4.
                  if (src0_type == src1_type &&
-                    ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op) &&
+                    (!ggml_is_quantized(src0_type) || (ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op))) &&
                      (ggml_type_size(src0_type) % 2) == 0) {
                      return true;
                  }
author	Jeff Bolz <redacted>
	Tue, 18 Nov 2025 06:41:24 +0000 (00:41 -0600)
committer	Georgi Gerganov <redacted>
	Fri, 12 Dec 2025 15:53:03 +0000 (17:53 +0200)