Tidy ggml-sycl (#5261)

author AidanBeltonS <redacted>

Fri, 2 Feb 2024 08:39:48 +0000 (08:39 +0000)

committer GitHub <redacted>

Fri, 2 Feb 2024 08:39:48 +0000 (16:39 +0800)
author AidanBeltonS <redacted>
Fri, 2 Feb 2024 08:39:48 +0000 (08:39 +0000)
committer GitHub <redacted>
Fri, 2 Feb 2024 08:39:48 +0000 (16:39 +0800)
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp

index 4ee2eed387bc03b5dd2a58e251532cd94728d88b..ac75f8e16f63e1cc13ac5dbda98f766da93ebf9b 100644 (file)
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -1366,6 +1366,7 @@ namespace dpct
              }
  #else
              return q.memcpy(to_ptr, from_ptr, size, dep_events);
+            GGML_UNUSED(direction);
  #endif // DPCT_USM_LEVEL_NONE
          }
  
@@ -1667,7 +1668,7 @@ namespace dpct
              using Ty = typename DataType<T>::T2;
              Ty s_h;
              if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only)
-                detail::dpct_memcpy(q, (void *)&s_h, (void *)s, sizeof(T), device_to_host)
+                detail::dpct_memcpy(q, (void *)&s_h, (const void *)s, sizeof(T), device_to_host)
                      .wait();
              else
                  s_h = *reinterpret_cast<const Ty *>(s);
@@ -1691,6 +1692,20 @@ namespace dpct
                                int ldb, const void *beta, void *c, int ldc)
          {
  #ifndef __INTEL_MKL__
+            GGML_UNUSED(q);
+            GGML_UNUSED(a_trans);
+            GGML_UNUSED(b_trans);
+            GGML_UNUSED(m);
+            GGML_UNUSED(n);
+            GGML_UNUSED(k);
+            GGML_UNUSED(alpha);
+            GGML_UNUSED(a);
+            GGML_UNUSED(lda);
+            GGML_UNUSED(b);
+            GGML_UNUSED(ldb);
+            GGML_UNUSED(beta);
+            GGML_UNUSED(c);
+            GGML_UNUSED(ldc);
              throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces "
                                       "Project does not support this API.");
  #else
@@ -1830,7 +1845,7 @@ namespace dpct
  
      template <typename T>
      T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
-                               int logical_sub_group_size = 32)
+                               unsigned int logical_sub_group_size = 32)
      {
          unsigned int id = g.get_local_linear_id();
          unsigned int start_index =
@@ -2160,6 +2175,7 @@ namespace dpct
          }
  #else
          return q.memcpy(to_ptr, from_ptr, size, dep_events);
+        GGML_UNUSED(direction);
  #endif // DPCT_USM_LEVEL_NONE
      }
  
@@ -3302,7 +3318,7 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo
      std::ofstream logfile;
      logfile.open(filename);
      // printf("local buf element %d\n", total_elements);
-    for(int i=0; i<total_elements; i++){
+    for(size_t i=0; i<total_elements; i++){
          if((i+1)%20 ==0) logfile <<std::endl;
          else logfile << local_buf[i] <<" ";
      }
@@ -3396,6 +3412,7 @@ static __dpct_inline__ float warp_reduce_max(float x,
  
  static __dpct_inline__ float op_repeat(const float a, const float b) {
      return b;
+    GGML_UNUSED(a);
  }
  
  static __dpct_inline__ float op_add(const float a, const float b) {
@@ -11156,10 +11173,10 @@ DPCT1082:64: Migration of CUmemGenericAllocationHandle type is not supported.
  //     g_sycl_pool_handles[GGML_SYCL_MAX_DEVICES];
  static dpct::device_ptr g_sycl_pool_addr[GGML_SYCL_MAX_DEVICES] = {0};
  static size_t g_sycl_pool_used[GGML_SYCL_MAX_DEVICES] = {0};
-static const size_t SYCL_POOL_VMM_MAX_SIZE = 1ull << 36; // 64 GB
  
  static void *ggml_sycl_pool_malloc_vmm(size_t size, size_t *actual_size) try {
-
+    GGML_UNUSED(size);
+    GGML_UNUSED(actual_size);
      return NULL;
  }
  catch (sycl::exception const &exc) {
@@ -11349,9 +11366,8 @@ void ggml_init_sycl() try {
              if(id!=user_device_id) continue;
  
              device_inx++;
-            int device_vmm = 0;
  
-            g_device_caps[device_inx].vmm = !!device_vmm;
+            g_device_caps[device_inx].vmm = 0;
              g_device_caps[device_inx].device_id = id;
              g_sycl_device_id2index[id].index = device_inx;
  
@@ -11359,18 +11375,12 @@ void ggml_init_sycl() try {
              SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
                  prop, dpct::dev_mgr::instance().get_device(id))));
  
-            // fprintf(stderr,
-            //         "  Device %d: %s, compute capability %d.%d, VMM: %s\n", id,
-            //         prop.get_name(), prop.get_major_version(),
-            //         prop.get_minor_version(), device_vmm ? "yes" : "no");
-
              g_tensor_split[device_inx] = total_vram;
              total_vram += prop.get_global_mem_size();
  
              g_device_caps[device_inx].cc =
                  100 * prop.get_major_version() + 10 * prop.get_minor_version();
  
-            // printf("g_device_caps[%d].cc=%d\n", device_inx, g_device_caps[device_inx].cc);
          }
          device_inx = -1;
          for (int id = 0; id < g_all_sycl_device_count; ++id) {
@@ -12206,7 +12216,6 @@ inline void ggml_sycl_op_mul_mat_sycl(
      // ldc == nrows of the matrix that cuBLAS writes into
      int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
  
-    const int compute_capability = g_device_caps[id].cc;
  #ifdef GGML_SYCL_F16
      bool use_fp16 = true;  // TODO(Yu) SYCL capability check
  #else
@@ -12691,7 +12700,7 @@ static void ggml_sycl_set_peer_access(const int n_tokens) {
                  continue;
              }
  
-            int can_access_peer;
+            // int can_access_peer;
              // SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other));
              // if (can_access_peer) {
              //     if (enable_peer_access) {
@@ -12716,7 +12725,6 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
      const int64_t ne01 = src0->ne[1];
      const int64_t ne02 = src0->ne[2];
      const int64_t ne03 = src0->ne[3];
-    const int64_t nrows0 = ggml_nrows(src0);
  
      const int64_t ne10 = src1->ne[0];
      const int64_t ne11 = src1->ne[1];
@@ -13812,13 +13820,6 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
          src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get();
          dst_row_extra.data_device[g_main_device_index]  =  dst_contiguous.get();
  
-        const dpct::memcpy_direction src1_kind =
-            src1->backend == GGML_BACKEND_CPU ? dpct::host_to_device
-                                              : dpct::device_to_device;
-        const dpct::memcpy_direction dst_kind = dst->backend == GGML_BACKEND_CPU
-                                                    ? dpct::device_to_host
-                                                    : dpct::device_to_device;
-
          for (int32_t row_id = 0; row_id < n_as; ++row_id) {
              const struct ggml_tensor * src0_row = dst->src[row_id + 2];
author	AidanBeltonS <redacted>
	Fri, 2 Feb 2024 08:39:48 +0000 (08:39 +0000)
committer	GitHub <redacted>
	Fri, 2 Feb 2024 08:39:48 +0000 (16:39 +0800)