SYCL: Reduce most of the compiler warnings (llama/10748)

author Akarshan Biswas <redacted>

Fri, 13 Dec 2024 06:42:15 +0000 (12:12 +0530)

committer Georgi Gerganov <redacted>

Wed, 18 Dec 2024 10:52:16 +0000 (12:52 +0200)
author Akarshan Biswas <redacted>
Fri, 13 Dec 2024 06:42:15 +0000 (12:12 +0530)
committer Georgi Gerganov <redacted>
Wed, 18 Dec 2024 10:52:16 +0000 (12:52 +0200)
diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp

index 97ab2003c7fa1201473dff617b7512b0d2cd0bef..a9ee404911460c0a4b8f40b7142b3e936509fd50 100644 (file)
--- a/ggml/src/ggml-sycl/common.cpp
+++ b/ggml/src/ggml-sycl/common.cpp
@@ -11,6 +11,7 @@
  //
  
  #include "common.hpp"
+#include "ggml-impl.h"
  
  int get_current_device_id() {
    return dpct::dev_mgr::instance().current_device_id();
@@ -28,11 +29,7 @@ void* ggml_sycl_host_malloc(size_t size) try {
  
    if (err != 0) {
      // clear the error
-    fprintf(
-        stderr,
-        "WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
-        size / 1024.0 / 1024.0,
-        "syclGetErrorString is not supported");
+    GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0,    "syclGetErrorString is not supported");
      return nullptr;
    }
  
@@ -66,18 +63,12 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block
  void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
                                   const ggml_tensor *src1, ggml_tensor *dst,
                                   const ggml_sycl_op_flatten_t op) try {
-    const int64_t nrows0 = ggml_nrows(src0);
  
      const bool use_src1 = src1 != nullptr;
-    const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
  
      GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
      GGML_ASSERT(              dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
  
-    ggml_tensor_extra_gpu * src0_extra =            (ggml_tensor_extra_gpu *) src0->extra;
-    ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
-    ggml_tensor_extra_gpu * dst_extra  =            (ggml_tensor_extra_gpu *)  dst->extra;
-
      // dd = data device
      float * src0_ddf = (float *) src0->data;
      float * src1_ddf = use_src1 ? (float *) src1->data : nullptr;
diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp

index 4549fa5e95a098b7f412b5e875e93a0fbd2e475f..c1582f610e5f4bb7f5427f7dd5392ca7b9915af2 100644 (file)
--- a/ggml/src/ggml-sycl/common.hpp
+++ b/ggml/src/ggml-sycl/common.hpp
@@ -626,6 +626,7 @@ struct bin_bcast_sycl {
                      });
              }
          }
+        GGML_UNUSED(ctx);
      }
  };
  
diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml/src/ggml-sycl/concat.cpp

index c90c452d8783fdfb53a1b1a015eed578792c3beb..a240968ad2e4884d1470fe3e2cbd0d4d403dadca 100644 (file)
--- a/ggml/src/ggml-sycl/concat.cpp
+++ b/ggml/src/ggml-sycl/concat.cpp
@@ -47,7 +47,7 @@ static void concat_f32_dim1(const float *x, const float *y, float *dst,
    // operation
    int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
                     item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
-  if (item_ct1.get_group(1) < ne01) { // src0
+  if (item_ct1.get_group(1) < (size_t) ne01) { // src0
      int offset_src =
          nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
      dst[offset_dst] = x[offset_src];
@@ -70,7 +70,7 @@ static void concat_f32_dim2(const float *x, const float *y, float *dst,
    // operation
    int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
                     item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
-  if (item_ct1.get_group(0) < ne02) { // src0
+  if (item_ct1.get_group(0) < (size_t) ne02) { // src0
      int offset_src = nidx + item_ct1.get_group(1) * ne0 +
                       item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
      dst[offset_dst] = x[offset_src];
diff --git a/ggml/src/ggml-sycl/convert.cpp b/ggml/src/ggml-sycl/convert.cpp

index 5fd15e6cdccabb277eff3b3b3171cbce976712c8..05b01db2d8b05e2a1eee390e70e654750ee0865f 100644 (file)
--- a/ggml/src/ggml-sycl/convert.cpp
+++ b/ggml/src/ggml-sycl/convert.cpp
@@ -424,7 +424,7 @@ static void convert_unary(const void * __restrict__ vx, dst_t * __restrict__ y,
      const int64_t global_id = item_ct1.get_local_id(2) + work_group_size * item_ct1.get_group(2);
  
      // make each work-item deal with more elements since sycl global range can not exceed max int
-    const src_t * x = (src_t *) vx;
+    const src_t * x = (const src_t *) vx;
      for (int64_t i = global_id; i < k; i += work_group_size * item_ct1.get_group_range(2)) {
          y[i] = x[i];
      }
diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp

index 0c3dfaa37eb02df2f5bb942ad194d2533abaa331..0d097357ce79be724d681fadd3b060b326e7e2d7 100644 (file)
--- a/ggml/src/ggml-sycl/dmmv.cpp
+++ b/ggml/src/ggml-sycl/dmmv.cpp
@@ -1015,9 +1015,9 @@ void ggml_sycl_op_dequantize_mul_mat_vec(
              break;
      }
  
-    (void) src1;
-    (void) dst;
-    (void) src1_ddq_i;
-    (void) src1_ncols;
-    (void) src1_padded_row_size;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_ddq_i);
+    GGML_UNUSED(src1_ncols);
+    GGML_UNUSED(src1_padded_row_size);
  }
diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp

index d1b5dd87c69222d359989af120adbf4de2e001c9..e167948e7a3f0cdf75ba60b57b66299931cbd5dc 100644 (file)
--- a/ggml/src/ggml-sycl/dpct/helper.hpp
+++ b/ggml/src/ggml-sycl/dpct/helper.hpp
@@ -1237,7 +1237,7 @@ namespace dpct
  
              std::map<byte_t *, allocation>::iterator get_map_iterator(const void *ptr)
              {
-                auto it = m_map.upper_bound((byte_t *)ptr);
+                auto it = m_map.upper_bound(const_cast<byte_t *>(reinterpret_cast<const byte_t *>(ptr)));
                  if (it == m_map.end())
                  {
                      // Not a virtual pointer.
diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp

index e5cd736eba9890aa81e0632618604856240eec9c..d05a51f807c208c29f84b8e0dee70dc26e94de24 100644 (file)
--- a/ggml/src/ggml-sycl/element_wise.cpp
+++ b/ggml/src/ggml-sycl/element_wise.cpp
@@ -237,7 +237,7 @@ void upscale_f32(const float  *x, float *dst, const int nb00, const int nb01,
      int i02 = i12 / sf2;
      int i03 = i13 / sf3;
  
-    dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
+    dst[index] = *(const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
  }
  
  void pad_f32(const float  *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02,
@@ -251,8 +251,7 @@ void pad_f32(const float  *x, float *dst, const int ne0, const int ne00, const i
      // operation
      int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
                       item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
-    if (nidx < ne00 && item_ct1.get_group(1) < ne01 &&
-        item_ct1.get_group(0) < ne02) {
+    if (nidx < ne00 && item_ct1.get_group(1) < (size_t) ne01 && item_ct1.get_group(0) < (size_t) ne02) {
          int offset_src = nidx + item_ct1.get_group(1) * ne00 +
                           item_ct1.get_group(0) * ne00 * ne01;
              dst[offset_dst] = x[offset_src];
@@ -520,9 +519,10 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -535,9 +535,10 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
                                      const ggml_tensor *src1, ggml_tensor *dst,
@@ -550,9 +551,10 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_
  
      gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -564,9 +566,10 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor
      GGML_ASSERT( dst->type == GGML_TYPE_F32);
      tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -579,9 +582,10 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -595,9 +599,10 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml
  
      hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -610,9 +615,10 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_t
  
      hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -625,9 +631,10 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -640,9 +647,10 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -655,9 +663,10 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_ten
  
      sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -670,9 +679,10 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -685,9 +695,10 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -700,9 +711,10 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -715,9 +727,10 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -730,9 +743,10 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -749,9 +763,10 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_
  
      leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -764,9 +779,10 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -787,9 +803,10 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_ten
                       dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
                       main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -805,9 +822,10 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor
          src0->ne[0], src0->ne[1], src0->ne[2],
          dst->ne[0], dst->ne[1], dst->ne[2], main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -827,7 +845,8 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);
  
-    (void) dst;
+    GGML_UNUSED(dst);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
diff --git a/ggml/src/ggml-sycl/gemm.hpp b/ggml/src/ggml-sycl/gemm.hpp

index 2ad9b36f419ce628aa260ce682920a5ccd51b43b..3f0f34ad603f59fe9aa10b8905045114a1cfe249 100644 (file)
--- a/ggml/src/ggml-sycl/gemm.hpp
+++ b/ggml/src/ggml-sycl/gemm.hpp
@@ -51,8 +51,8 @@ public:
          const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
          const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
          const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
-        auto a_mem = dnnl::memory(a_in_md, eng, (void*)a);
-        auto b_mem = dnnl::memory(b_in_md, eng, (void*)b);
+        auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
+        auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
          auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md);
          auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c);
  
@@ -79,8 +79,8 @@ public:
          const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
          const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
          const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
-        auto a_mem = dnnl::memory(a_in_md, eng, (void*)a);
-        auto b_mem = dnnl::memory(b_in_md, eng, (void*)b);
+        auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
+        auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
          auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md);
          auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c);
  
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp

index ae3baedc7b6c368e02692c9f773e2304a7ebba98..6b9f0b0d9a1c8726f186d7b25a8be5d6a26b0dd4 100644 (file)
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -47,7 +47,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
  
      info.device_count = dpct::dev_mgr::instance().device_count();
      if (info.device_count == 0) {
-        GGML_LOG_ERROR("%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__);
+        GGML_LOG_ERROR("%s: failed to initialize: %s\n", GGML_SYCL_NAME, __func__);
          return info;
      }
  
@@ -64,7 +64,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
  #else
      GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
  #endif
-    GGML_LOG_INFO("%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count);
+    GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME);
  
      for (int i = 0; i < info.device_count; ++i) {
          info.devices[i].vmm = 0;
@@ -137,7 +137,6 @@ void ggml_backend_sycl_print_sycl_devices() {
  
      for (int id = 0; id < device_count; ++id) {
        sycl::device device = dpct::dev_mgr::instance().get_device(id);
-      sycl::backend backend = device.get_backend();
        std::string backend_type = get_device_backend_and_type(device);
        int type_id = DeviceNums[backend_type]++;
        std::stringstream device_type;
@@ -420,13 +419,11 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
          return true;
      }
      return false;
+    GGML_UNUSED(buffer);
+} catch (const sycl::exception & exc) {
+    std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
+    std::exit(1);
  }
-catch (sycl::exception const &exc) {
-  std::cerr << exc.what() << "Exception caught at file:" << __FILE__
-            << ", line:" << __LINE__ << std::endl;
-  std::exit(1);
-}
-
  
  static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
                                             uint8_t value) try {
@@ -1092,10 +1089,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
      ggml_sycl_buffer buffer_pool[MAX_SYCL_BUFFERS] = {};
      size_t pool_size = 0;
  
-    explicit ggml_sycl_pool_leg(queue_ptr qptr_, int device_) :
-        qptr(qptr_),
-        device(device_) {
-    }
+    explicit ggml_sycl_pool_leg(queue_ptr qptr_, int device_) : device(device_), qptr(qptr_) {}
  
      ~ggml_sycl_pool_leg() {
          for (int i = 0; i < MAX_SYCL_BUFFERS; ++i) {
@@ -1238,7 +1232,7 @@ static void quantize_q8_1(const float * __restrict__ x, void * __restrict__ vy,
          zeros[i] = 0.f;
          qzeros[i] = 0;
      }
-    const TC xi = ix < kx ? *(TC *)&x[iy * kx + ix] : zeros;
+    const TC xi = ix < kx ? *(const TC *)&x[iy * kx + ix] : zeros;
      float sum = xi[0];
      float amax = sycl::fabs(xi[0]);
  #pragma unroll
@@ -1799,6 +1793,9 @@ static  void pool2d_nchw_kernel(
          switch (op) {
              case GGML_OP_POOL_AVG: res = 0; break;
              case GGML_OP_POOL_MAX: res = -FLT_MAX; break;
+            default:
+                res      = (To) sycl::nan(uint32_t(0));
+                break;
          }
  
          for (int i = bh; i < eh; i += 1) {
@@ -1817,6 +1814,9 @@ static  void pool2d_nchw_kernel(
                  switch (op) {
                      case GGML_OP_POOL_AVG: res += (cur / (kh * kw)); break;
                      case GGML_OP_POOL_MAX: res = sycl::max(res, (To)cur); break;
+                    default:
+                        res = (To) sycl::nan(uint32_t(0));
+                        break;
                  }
              }
          }
@@ -1855,7 +1855,8 @@ static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
                                   s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
                           });
  
-    (void) dst;
+    GGML_UNUSED(dst);
+    GGML_UNUSED(ctx);
  }
  
  template <typename src0_t>
@@ -1893,10 +1894,10 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
              });
      }
  
-    (void) dst;
+    GGML_UNUSED(dst);
+    GGML_UNUSED(ctx);
  }
  
-
  static void quantize_row_q8_1_sycl(const float *x, void *vy, const int kx,
                                     const int ky, const int kx_padded,
                                     queue_ptr stream) {
@@ -2464,8 +2465,8 @@ static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, const ggml_tens
  
      ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_repeat>>(ctx, dst, src0, dst, nullptr, src0_d, dst_d, main_stream);
  
-    (void) src1;
-    (void) src1_d;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(src1_d);
  }
  
  
@@ -2484,17 +2485,18 @@ inline void ggml_sycl_op_mul_mat_sycl(
      const int64_t ne00 = src0->ne[0];
      const int64_t ne10 = src1->ne[0];
  
-    const int64_t ne0 = dst->ne[0];
  
      const int64_t row_diff = row_high - row_low;
  
      int id;
      SYCL_CHECK(
          CHECK_TRY_ERROR(id = get_current_device_id()));
-
+#if !GGML_SYCL_DNNL
+    const int64_t ne0 = dst->ne[0];
      // the main device has a larger memory buffer to hold the results from all GPUs
      // ldc == nrows of the matrix that cuBLAS writes into
      int ldc = id == ctx.device ? ne0 : row_diff;
+#endif
  
  #ifdef GGML_SYCL_F16
      bool use_fp16 = true;  // TODO(Yu) SYCL capability check
@@ -2531,9 +2533,9 @@ inline void ggml_sycl_op_mul_mat_sycl(
                                           : src1_as_f16.get();
          ggml_sycl_pool_alloc<sycl::half> dst_f16(ctx.pool(), row_diff * src1_ncols);
  
-        const sycl::half alpha_f16 = 1.0f;
-        const sycl::half beta_f16 = 0.0f;
  #if !GGML_SYCL_DNNL
+        const sycl::half alpha_f16 = 1.0f;
+        const sycl::half beta_f16  = 0.0f;
          SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm(
              *stream, oneapi::mkl::transpose::trans,
              oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
@@ -2570,9 +2572,9 @@ inline void ggml_sycl_op_mul_mat_sycl(
          const float * src0_ddf_i = src0->type == GGML_TYPE_F32 ? (const float *) src0_dd_i : src0_ddq_as_f32.get();
          const float * src1_ddf1_i = src1->type == GGML_TYPE_F32 ? (const float *) src1_ddf_i : src1_ddq_as_f32.get();
  
-        const float alpha = 1.0f;
-        const float beta = 0.0f;
  #if !GGML_SYCL_DNNL
+        const float alpha = 1.0f;
+        const float beta  = 0.0f;
  #    ifdef GGML_SYCL_NVIDIA
          SYCL_CHECK(CHECK_TRY_ERROR(oneapi::mkl::blas::column_major::gemm(
              oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ *stream }, oneapi::mkl::transpose::trans,
@@ -2590,9 +2592,9 @@ inline void ggml_sycl_op_mul_mat_sycl(
              src0_ddf_i, DnnlGemmWrapper::to_dt<float>(), dst_dd_i, DnnlGemmWrapper::to_dt<float>());
  #endif
      }
-    (void) dst;
-    (void) src1_ddq_i;
-    (void) src1_padded_row_size;
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_ddq_i);
+    GGML_UNUSED(src1_padded_row_size);
  }
  catch (sycl::exception const &exc) {
    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -2638,8 +2640,9 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
                                 item_ct1);
          });
  
-    (void) src1;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -2654,9 +2657,10 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor
  
      sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -2673,9 +2677,10 @@ inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_te
  
      sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -2694,9 +2699,10 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_ten
  
      argsort_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, order, main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -2713,9 +2719,10 @@ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tens
  
      argmax_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -2735,9 +2742,10 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const gg
  
      diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -2758,9 +2766,10 @@ inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tenso
      */
      SYCL_CHECK(0);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -2783,9 +2792,10 @@ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tenso
      */
      SYCL_CHECK(0);
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
  
  static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
@@ -2862,7 +2872,6 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
  
      ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
      ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
-    ggml_tensor_extra_gpu *  dst_extra = (ggml_tensor_extra_gpu *)  dst->extra;
  
      const bool src0_is_contiguous = ggml_is_contiguous(src0);
      const bool src1_is_contiguous = ggml_is_contiguous(src1);
@@ -3289,7 +3298,6 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx,
  
      GGML_TENSOR_BINARY_OP_LOCALS
  
-    const int64_t ne_dst = ggml_nelements(dst);
  
      SYCL_CHECK(ggml_sycl_set_device(ctx.device));
      queue_ptr main_stream = ctx.stream();;
@@ -3397,6 +3405,7 @@ catch (sycl::exception const &exc) {
  
  inline bool ggml_sycl_supports_mmq(enum ggml_type type) {
      // TODO: accuracy issues in MMQ
+    GGML_UNUSED(type);
      return false;
  }
  
@@ -3772,7 +3781,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
          GGML_ABORT("fatal error");
      }
  
-    (void) dst;
+    GGML_UNUSED(dst);
  }
  catch (sycl::exception const &exc) {
    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -3783,7 +3792,7 @@ catch (sycl::exception const &exc) {
  static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
      // TODO: why do we pass dst as src1 here?
      ggml_sycl_cpy(ctx, src0, dst, nullptr);
-    (void) src1;
+    GGML_UNUSED(src1);
  }
  
  static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@@ -3828,13 +3837,16 @@ static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor
  }
  
  static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
-    (void) src0;
-    (void) src1;
-    (void) dst;
+    GGML_UNUSED(src0);
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(ctx);
  }
  
  void ggml_sycl_set_main_device(const int main_device) try {
-    if (dpct::get_current_device_id() == main_device) return;
+    if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) {
+        return;
+    }
      check_allow_gpu_index(main_device);
      dpct::select_device(main_device);
  
@@ -4202,6 +4214,7 @@ try
  {
      ggml_backend_sycl_context *sycl_ctx =
          (ggml_backend_sycl_context *)backend->context;
+
      sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
  
      const queue_ptr &stream = sycl_ctx->stream(sycl_ctx->device, 0);
@@ -4216,7 +4229,7 @@ catch (sycl::exception const &exc)
  }
  
  static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
-    ggml_backend_sycl_context* sycl_ctx = static_cast<ggml_backend_sycl_context*>(backend->context);
+
      sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
  
      if (ggml_backend_is_sycl(backend)) {
@@ -4624,6 +4637,7 @@ static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, cons
      // SYCL doesn't support registering host memory, left here for reference
      // "ggml_backend_register_host_buffer"
      // "ggml_backend_unregister_host_buffer"
+    GGML_UNUSED(name);
      return nullptr;
  }
  
diff --git a/ggml/src/ggml-sycl/im2col.cpp b/ggml/src/ggml-sycl/im2col.cpp

index 6a0a0fcd08c68a32b1e2722751a409c90825accd..6146a99edbe7742e81cb9d12e99afd3ab6d1cf93 100644 (file)
--- a/ggml/src/ggml-sycl/im2col.cpp
+++ b/ggml/src/ggml-sycl/im2col.cpp
@@ -120,6 +120,7 @@ void ggml_sycl_op_im2col(
          im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
      }
  
-    (void) src0;
-    (void) src0_dd;
+    GGML_UNUSED(src0);
+    GGML_UNUSED(src0_dd);
+    GGML_UNUSED(ctx);
  }
diff --git a/ggml/src/ggml-sycl/mmq.cpp b/ggml/src/ggml-sycl/mmq.cpp

index e952533d310ec8dbd4d9ca86e152272169d2bab2..8ea82c940c788190220008a82eaf22856d316ae2 100644 (file)
--- a/ggml/src/ggml-sycl/mmq.cpp
+++ b/ggml/src/ggml-sycl/mmq.cpp
@@ -813,7 +813,7 @@ load_tiles_q4_K(const void *__restrict__ vx, int *__restrict__ x_ql,
          x_ql[i * (WARP_SIZE + 1) + k] = get_int_from_uint8_aligned(bxi->qs, kqsx);
      }
  
-    const int blocks_per_tile_x_row = WARP_SIZE / QI4_K; // == 1 if QK_K == 256
+    constexpr int blocks_per_tile_x_row = QI4_K > WARP_SIZE ? 1 : WARP_SIZE / QI4_K; // == 1 if QK_K == 256
      const int kbxd = k % blocks_per_tile_x_row;          // == 0 if QK_K == 256
  
  #pragma unroll
@@ -961,7 +961,7 @@ load_tiles_q5_K(const void *__restrict__ vx, int *__restrict__ x_ql,
          x_ql[i * (2*WARP_SIZE + 1) + kq1] = ql1 | qh1;
      }
  
-    const int blocks_per_tile_x_row = WARP_SIZE / QI5_K; // == 1 if QK_K == 256
+    constexpr int blocks_per_tile_x_row = QI5_K > WARP_SIZE ? 1 : WARP_SIZE / QI5_K; // == 1 if QK_K == 256
      const int kbxd = k % blocks_per_tile_x_row;          // == 0 if QK_K == 256
  
  #pragma unroll
@@ -1109,7 +1109,7 @@ load_tiles_q6_K(const void *__restrict__ vx, int *__restrict__ x_ql,
                                                   dpct::sub_sat());
      }
  
-    const int blocks_per_tile_x_row = WARP_SIZE / QI6_K; // == 1 if QK_K == 256
+    constexpr int blocks_per_tile_x_row = QI6_K > WARP_SIZE ? 1 : WARP_SIZE / QI6_K; // == 1 if QK_K == 256
      const int kbxd = k % blocks_per_tile_x_row;          // == 0 if QK_K == 256
      float * x_dmf = (float *) x_dm;
  
@@ -3020,9 +3020,9 @@ void ggml_sycl_op_mul_mat_q(
              break;
      }
  
-    (void) src1;
-    (void) dst;
-    (void) src1_ddf_i;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_ddf_i);
  }
  catch (sycl::exception const &exc) {
    std::cerr << exc.what() << "Exception caught at file:" << __FILE__
diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml/src/ggml-sycl/mmvq.cpp

index 7b10cf688148e0152d6cbbae843f50cb93431f6d..221f65c21ea36dd7656e31187bdd30f955cdb907 100644 (file)
--- a/ggml/src/ggml-sycl/mmvq.cpp
+++ b/ggml/src/ggml-sycl/mmvq.cpp
@@ -753,11 +753,7 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
      const sycl::range<3> block_nums(1, 1, block_num_y);
      const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, QK_WARP_SIZE);
      {
-
-        stream->submit([&](sycl::handler &cgh) {
-            auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
-            auto ksigns64_ptr_ct1 = &ksigns64[0];
-
+        stream->submit([&](sycl::handler & cgh) {
              cgh.parallel_for(
                  sycl::nd_range<3>(block_nums * block_dims, block_dims),
                  [=](sycl::nd_item<3> item_ct1)
@@ -780,9 +776,6 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy,
      {
  
          stream->submit([&](sycl::handler &cgh) {
-            auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
-            auto ksigns64_ptr_ct1 = &ksigns64[0];
-
              cgh.parallel_for(
                  sycl::nd_range<3>(block_nums * block_dims, block_dims),
                  [=](sycl::nd_item<3> item_ct1)
@@ -805,9 +798,6 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
      {
  
          stream->submit([&](sycl::handler &cgh) {
-            auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
-            auto ksigns64_ptr_ct1 = &ksigns64[0];
-
              cgh.parallel_for(
                  sycl::nd_range<3>(block_nums * block_dims, block_dims),
                  [=](sycl::nd_item<3> item_ct1)
@@ -830,8 +820,6 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
      {
  
          stream->submit([&](sycl::handler &cgh) {
-            auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
-
              cgh.parallel_for(
                  sycl::nd_range<3>(block_nums * block_dims, block_dims),
                  [=](sycl::nd_item<3> item_ct1)
@@ -854,9 +842,6 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
      {
  
          stream->submit([&](sycl::handler &cgh) {
-            auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
-            auto ksigns64_ptr_ct1 = &ksigns64[0];
-
              cgh.parallel_for(
                  sycl::nd_range<3>(block_nums * block_dims, block_dims),
                  [=](sycl::nd_item<3> item_ct1)
@@ -954,7 +939,7 @@ void ggml_sycl_op_mul_mat_vec_q(
      const size_t q8_1_bs = QK8_1;
      // the main device has a larger memory buffer to hold the results from all GPUs
      // nrows_dst == nrows of the matrix that the kernel writes into
-    const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff;
+
      for (int i = 0; i < src1_ncols; i++)
      {
          const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;
@@ -1023,7 +1008,8 @@ void ggml_sycl_op_mul_mat_vec_q(
              break;
          }
      }
-    (void) src1;
-    (void) dst;
-    (void) src1_ddf_i;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_ddf_i);
+    GGML_UNUSED(ctx);
  }
diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp

index 72d8fdb878c8de290739f339d8e9037cd99d8d31..9cf2be15575d8ade92bb07c56a37fa221dac8c71 100644 (file)
--- a/ggml/src/ggml-sycl/norm.cpp
+++ b/ggml/src/ggml-sycl/norm.cpp
@@ -31,7 +31,7 @@ static void norm_f32(const float* x, float* dst, const int ncols, const float ep
          */
          item_ct1.barrier(sycl::access::fence_space::local_space);
          mean_var = 0.f;
-        int nreduce = nwarps / WARP_SIZE;
+        size_t nreduce = nwarps / WARP_SIZE;
          for (size_t i = 0; i < nreduce; i += 1)
          {
              mean_var += s_sum[lane_id + i * WARP_SIZE];
@@ -55,7 +55,7 @@ static void group_norm_f32(const float* x, float* dst, const int group_size, con
      const int nthreads = item_ct1.get_local_range(2);
      const int nwarps = nthreads / WARP_SIZE;
      start += item_ct1.get_local_id(2);
-    int nreduce = nwarps / WARP_SIZE;
+    size_t nreduce = nwarps / WARP_SIZE;
  
      if (end >= ne_elements) {
          end = ne_elements;
@@ -163,7 +163,7 @@ static void rms_norm_f32(const float* x, float* dst, const int ncols, const floa
          converged control flow. You may need to adjust the code.
          */
          item_ct1.barrier(sycl::access::fence_space::local_space);
-        int nreduce = nwarps / WARP_SIZE;
+        size_t nreduce = nwarps / WARP_SIZE;
          tmp = 0.f;
          for (size_t i = 0; i < nreduce; i += 1)
          {
@@ -352,6 +352,7 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor*
      (void)src1;
      (void)dst;
      (void)src1_dd;
+    GGML_UNUSED(ctx);
  }
  
  void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp

index 1f06f78fa3d9193d3dc16e45a3aea6473d053b1e..1244b231af738836fd48833f62e066a6f8672631 100644 (file)
--- a/ggml/src/ggml-sycl/rope.cpp
+++ b/ggml/src/ggml-sycl/rope.cpp
@@ -269,7 +269,8 @@ void ggml_sycl_op_rope(
          }
      }
  
-    (void) src1;
-    (void) dst;
-    (void) src1_dd;
+    GGML_UNUSED(src1);
+    GGML_UNUSED(dst);
+    GGML_UNUSED(src1_dd);
+    GGML_UNUSED(ctx);
  }
diff --git a/ggml/src/ggml-sycl/softmax.cpp b/ggml/src/ggml-sycl/softmax.cpp

index 17a542e49036278c0e650ba6661ae96af2deef12..a9b3fce0dc430a6e6e8daa148d9bb1fc9ff81074 100644 (file)
--- a/ggml/src/ggml-sycl/softmax.cpp
+++ b/ggml/src/ggml-sycl/softmax.cpp
@@ -16,7 +16,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
      const int lane_id = item_ct1.get_local_id(2) % WARP_SIZE;
      const int nthreads = block_size;
      const int nwarps = nthreads / WARP_SIZE;
-    int nreduce = nwarps / WARP_SIZE;
+    size_t nreduce = nwarps / WARP_SIZE;
      float slope = 1.0f;
  
      // ALiBi
@@ -53,8 +53,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
      if (block_size > WARP_SIZE) {
          if (warp_id == 0) {
              buf[lane_id] = -INFINITY;
-            for (size_t i = 1; i < nreduce; i += 1)
+            for (size_t i = 1; i < nreduce; i += 1) {
                  buf[lane_id + i * WARP_SIZE] = -INFINITY;
+            }
          }
          item_ct1.barrier(sycl::access::fence_space::local_space);
  
@@ -63,8 +64,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
          }
          item_ct1.barrier(sycl::access::fence_space::local_space);
          max_val = buf[lane_id];
-        for (size_t i = 1; i < nreduce; i += 1)
-        {
+        for (size_t i = 1; i < nreduce; i += 1) {
              max_val = std::max(max_val, buf[lane_id + i * WARP_SIZE]);
          }
          max_val = warp_reduce_max(max_val, item_ct1);
@@ -89,8 +89,9 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
          item_ct1.barrier(sycl::access::fence_space::local_space);
          if (warp_id == 0) {
              buf[lane_id] = 0.f;
-            for (size_t i = 1; i < nreduce; i += 1)
+            for (size_t i = 1; i < nreduce; i += 1) {
                  buf[lane_id + i * WARP_SIZE] = 0.f;
+            }
          }
          item_ct1.barrier(sycl::access::fence_space::local_space);
  
@@ -100,8 +101,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
          item_ct1.barrier(sycl::access::fence_space::local_space);
  
          tmp = buf[lane_id];
-        for (size_t i = 1; i < nreduce; i += 1)
-        {
+        for (size_t i = 1; i < nreduce; i += 1) {
              tmp += buf[lane_id + i * WARP_SIZE];
          }
          tmp = warp_reduce_sum(tmp, item_ct1);
diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp

index d5c227cd1abcd333e46229178654524e654ab421..2ffe3cca917255499dac625a78e489bc54e8fdc5 100644 (file)
--- a/ggml/src/ggml-sycl/tsembd.cpp
+++ b/ggml/src/ggml-sycl/tsembd.cpp
@@ -68,4 +68,5 @@ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml
      const int max_period = dst->op_params[1];
  
      timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
+    GGML_UNUSED(src1);
  }
diff --git a/ggml/src/ggml-sycl/wkv6.cpp b/ggml/src/ggml-sycl/wkv6.cpp

index 4c737f4bfce2fcd34d0e7e24e31e9e4d73b5fbe4..75ddfb86ac0f7ee9b093ad13ebf7d7ef5edf3730 100644 (file)
--- a/ggml/src/ggml-sycl/wkv6.cpp
+++ b/ggml/src/ggml-sycl/wkv6.cpp
@@ -59,7 +59,7 @@ static void rwkv_wkv_f32_kernel(
          float y = 0;
  
          // Process in chunks of 4 for better vectorization
-        sycl::float4 k4, r4, tf4, td4, s4, kv4;
+        sycl::float4 k4, r4, tf4, td4, s4;
          #pragma unroll
          for (int j = 0; j < head_size; j += 4) {
              // Load data in vec4 chunks
@@ -135,4 +135,7 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* s
                  );
              });
      });
+
+    GGML_UNUSED(src0);
+    GGML_UNUSED(src1);
  }
author	Akarshan Biswas <redacted>
	Fri, 13 Dec 2024 06:42:15 +0000 (12:12 +0530)
committer	Georgi Gerganov <redacted>
	Wed, 18 Dec 2024 10:52:16 +0000 (12:52 +0200)
ggml/src/ggml-sycl/common.cpp		patch \| blob \| history
ggml/src/ggml-sycl/common.hpp		patch \| blob \| history
ggml/src/ggml-sycl/concat.cpp		patch \| blob \| history
ggml/src/ggml-sycl/convert.cpp		patch \| blob \| history
ggml/src/ggml-sycl/dmmv.cpp		patch \| blob \| history
ggml/src/ggml-sycl/dpct/helper.hpp		patch \| blob \| history
ggml/src/ggml-sycl/element_wise.cpp		patch \| blob \| history
ggml/src/ggml-sycl/gemm.hpp		patch \| blob \| history
ggml/src/ggml-sycl/ggml-sycl.cpp		patch \| blob \| history
ggml/src/ggml-sycl/im2col.cpp		patch \| blob \| history
ggml/src/ggml-sycl/mmq.cpp		patch \| blob \| history
ggml/src/ggml-sycl/mmvq.cpp		patch \| blob \| history
ggml/src/ggml-sycl/norm.cpp		patch \| blob \| history
ggml/src/ggml-sycl/rope.cpp		patch \| blob \| history
ggml/src/ggml-sycl/softmax.cpp		patch \| blob \| history
ggml/src/ggml-sycl/tsembd.cpp		patch \| blob \| history
ggml/src/ggml-sycl/wkv6.cpp		patch \| blob \| history