From: Abhilash Majumder Date: Thu, 8 Feb 2024 17:09:10 +0000 (+0530) Subject: Fix f16_sycl cpy call from Arc (llama/5411) X-Git-Tag: upstream/0.0.1642~985 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c8efd6540103a58d6ceab75c175040d601eabc9e;p=pkg%2Fggml%2Fsources%2Fggml Fix f16_sycl cpy call from Arc (llama/5411) * fix f16_sycl cpy call * rm old logic * add fp16 build CI * use macro * format fix --- diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index a03df4c6..dd562a89 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -12148,7 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( const int64_t src1_ncols, const int64_t src1_padded_row_size, const dpct::queue_ptr &stream) { - const int64_t ne00 = src0->ne[0]; + GGML_TENSOR_BINARY_OP_LOCALS + const int64_t row_diff = row_high - row_low; // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics @@ -12167,8 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( } else { src1_dfloat = src1_dfloat_a.alloc(ne00); ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, - ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, - sizeof(sycl::half), 0, 0, stream); + ne00, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, + nb13, stream); } } #else