Fix f16_sycl cpy call from Arc (llama/5411)

author Abhilash Majumder <redacted>

Thu, 8 Feb 2024 17:09:10 +0000 (22:39 +0530)

committer Georgi Gerganov <redacted>

Sat, 10 Feb 2024 07:30:59 +0000 (09:30 +0200)
author Abhilash Majumder <redacted>
Thu, 8 Feb 2024 17:09:10 +0000 (22:39 +0530)
committer Georgi Gerganov <redacted>
Sat, 10 Feb 2024 07:30:59 +0000 (09:30 +0200)
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp

index a03df4c654303020f44d20be40814c6a633f937f..dd562a89828eb01181b35e766c63732a518b1d0f 100644 (file)
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -12148,7 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
      const int64_t src1_ncols, const int64_t src1_padded_row_size,
      const dpct::queue_ptr &stream) {
  
-    const int64_t ne00 = src0->ne[0];
+    GGML_TENSOR_BINARY_OP_LOCALS
+
      const int64_t row_diff = row_high - row_low;
  
      // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
@@ -12167,8 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
          } else {
              src1_dfloat = src1_dfloat_a.alloc(ne00);
              ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
-                                  ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
-                                  sizeof(sycl::half), 0, 0, stream);
+                                  ne00, ne00, ne01, ne02, nb00, nb01, nb02,
+                                  nb03, ne10, ne11, ne12, nb10, nb11, nb12,
+                                  nb13, stream);
          }
      }
  #else
author	Abhilash Majumder <redacted>
	Thu, 8 Feb 2024 17:09:10 +0000 (22:39 +0530)
committer	Georgi Gerganov <redacted>
	Sat, 10 Feb 2024 07:30:59 +0000 (09:30 +0200)