]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
Tidy ggml-sycl (#5261)
authorAidanBeltonS <redacted>
Fri, 2 Feb 2024 08:39:48 +0000 (08:39 +0000)
committerGitHub <redacted>
Fri, 2 Feb 2024 08:39:48 +0000 (16:39 +0800)
* Tidy some code in ggml-sycl

* Remove blank space

* Remove std::printf comments

---------

Co-authored-by: Abhilash Majumder <redacted>
ggml-sycl.cpp

index 4ee2eed387bc03b5dd2a58e251532cd94728d88b..ac75f8e16f63e1cc13ac5dbda98f766da93ebf9b 100644 (file)
@@ -1366,6 +1366,7 @@ namespace dpct
             }
 #else
             return q.memcpy(to_ptr, from_ptr, size, dep_events);
+            GGML_UNUSED(direction);
 #endif // DPCT_USM_LEVEL_NONE
         }
 
@@ -1667,7 +1668,7 @@ namespace dpct
             using Ty = typename DataType<T>::T2;
             Ty s_h;
             if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only)
-                detail::dpct_memcpy(q, (void *)&s_h, (void *)s, sizeof(T), device_to_host)
+                detail::dpct_memcpy(q, (void *)&s_h, (const void *)s, sizeof(T), device_to_host)
                     .wait();
             else
                 s_h = *reinterpret_cast<const Ty *>(s);
@@ -1691,6 +1692,20 @@ namespace dpct
                               int ldb, const void *beta, void *c, int ldc)
         {
 #ifndef __INTEL_MKL__
+            GGML_UNUSED(q);
+            GGML_UNUSED(a_trans);
+            GGML_UNUSED(b_trans);
+            GGML_UNUSED(m);
+            GGML_UNUSED(n);
+            GGML_UNUSED(k);
+            GGML_UNUSED(alpha);
+            GGML_UNUSED(a);
+            GGML_UNUSED(lda);
+            GGML_UNUSED(b);
+            GGML_UNUSED(ldb);
+            GGML_UNUSED(beta);
+            GGML_UNUSED(c);
+            GGML_UNUSED(ldc);
             throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces "
                                      "Project does not support this API.");
 #else
@@ -1830,7 +1845,7 @@ namespace dpct
 
     template <typename T>
     T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
-                               int logical_sub_group_size = 32)
+                               unsigned int logical_sub_group_size = 32)
     {
         unsigned int id = g.get_local_linear_id();
         unsigned int start_index =
@@ -2160,6 +2175,7 @@ namespace dpct
         }
 #else
         return q.memcpy(to_ptr, from_ptr, size, dep_events);
+        GGML_UNUSED(direction);
 #endif // DPCT_USM_LEVEL_NONE
     }
 
@@ -3302,7 +3318,7 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo
     std::ofstream logfile;
     logfile.open(filename);
     // printf("local buf element %d\n", total_elements);
-    for(int i=0; i<total_elements; i++){
+    for(size_t i=0; i<total_elements; i++){
         if((i+1)%20 ==0) logfile <<std::endl;
         else logfile << local_buf[i] <<" ";
     }
@@ -3396,6 +3412,7 @@ static __dpct_inline__ float warp_reduce_max(float x,
 
 static __dpct_inline__ float op_repeat(const float a, const float b) {
     return b;
+    GGML_UNUSED(a);
 }
 
 static __dpct_inline__ float op_add(const float a, const float b) {
@@ -11156,10 +11173,10 @@ DPCT1082:64: Migration of CUmemGenericAllocationHandle type is not supported.
 //     g_sycl_pool_handles[GGML_SYCL_MAX_DEVICES];
 static dpct::device_ptr g_sycl_pool_addr[GGML_SYCL_MAX_DEVICES] = {0};
 static size_t g_sycl_pool_used[GGML_SYCL_MAX_DEVICES] = {0};
-static const size_t SYCL_POOL_VMM_MAX_SIZE = 1ull << 36; // 64 GB
 
 static void *ggml_sycl_pool_malloc_vmm(size_t size, size_t *actual_size) try {
-
+    GGML_UNUSED(size);
+    GGML_UNUSED(actual_size);
     return NULL;
 }
 catch (sycl::exception const &exc) {
@@ -11349,9 +11366,8 @@ void ggml_init_sycl() try {
             if(id!=user_device_id) continue;
 
             device_inx++;
-            int device_vmm = 0;
 
-            g_device_caps[device_inx].vmm = !!device_vmm;
+            g_device_caps[device_inx].vmm = 0;
             g_device_caps[device_inx].device_id = id;
             g_sycl_device_id2index[id].index = device_inx;
 
@@ -11359,18 +11375,12 @@ void ggml_init_sycl() try {
             SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
                 prop, dpct::dev_mgr::instance().get_device(id))));
 
-            // fprintf(stderr,
-            //         "  Device %d: %s, compute capability %d.%d, VMM: %s\n", id,
-            //         prop.get_name(), prop.get_major_version(),
-            //         prop.get_minor_version(), device_vmm ? "yes" : "no");
-
             g_tensor_split[device_inx] = total_vram;
             total_vram += prop.get_global_mem_size();
 
             g_device_caps[device_inx].cc =
                 100 * prop.get_major_version() + 10 * prop.get_minor_version();
 
-            // printf("g_device_caps[%d].cc=%d\n", device_inx, g_device_caps[device_inx].cc);
         }
         device_inx = -1;
         for (int id = 0; id < g_all_sycl_device_count; ++id) {
@@ -12206,7 +12216,6 @@ inline void ggml_sycl_op_mul_mat_sycl(
     // ldc == nrows of the matrix that cuBLAS writes into
     int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
 
-    const int compute_capability = g_device_caps[id].cc;
 #ifdef GGML_SYCL_F16
     bool use_fp16 = true;  // TODO(Yu) SYCL capability check
 #else
@@ -12691,7 +12700,7 @@ static void ggml_sycl_set_peer_access(const int n_tokens) {
                 continue;
             }
 
-            int can_access_peer;
+            // int can_access_peer;
             // SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other));
             // if (can_access_peer) {
             //     if (enable_peer_access) {
@@ -12716,7 +12725,6 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
     const int64_t ne01 = src0->ne[1];
     const int64_t ne02 = src0->ne[2];
     const int64_t ne03 = src0->ne[3];
-    const int64_t nrows0 = ggml_nrows(src0);
 
     const int64_t ne10 = src1->ne[0];
     const int64_t ne11 = src1->ne[1];
@@ -13812,13 +13820,6 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
         src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get();
         dst_row_extra.data_device[g_main_device_index]  =  dst_contiguous.get();
 
-        const dpct::memcpy_direction src1_kind =
-            src1->backend == GGML_BACKEND_CPU ? dpct::host_to_device
-                                              : dpct::device_to_device;
-        const dpct::memcpy_direction dst_kind = dst->backend == GGML_BACKEND_CPU
-                                                    ? dpct::device_to_host
-                                                    : dpct::device_to_device;
-
         for (int32_t row_id = 0; row_id < n_as; ++row_id) {
             const struct ggml_tensor * src0_row = dst->src[row_id + 2];