}
#else
return q.memcpy(to_ptr, from_ptr, size, dep_events);
+ GGML_UNUSED(direction);
#endif // DPCT_USM_LEVEL_NONE
}
using Ty = typename DataType<T>::T2;
Ty s_h;
if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only)
- detail::dpct_memcpy(q, (void *)&s_h, (void *)s, sizeof(T), device_to_host)
+ detail::dpct_memcpy(q, (void *)&s_h, (const void *)s, sizeof(T), device_to_host)
.wait();
else
s_h = *reinterpret_cast<const Ty *>(s);
int ldb, const void *beta, void *c, int ldc)
{
#ifndef __INTEL_MKL__
+ GGML_UNUSED(q);
+ GGML_UNUSED(a_trans);
+ GGML_UNUSED(b_trans);
+ GGML_UNUSED(m);
+ GGML_UNUSED(n);
+ GGML_UNUSED(k);
+ GGML_UNUSED(alpha);
+ GGML_UNUSED(a);
+ GGML_UNUSED(lda);
+ GGML_UNUSED(b);
+ GGML_UNUSED(ldb);
+ GGML_UNUSED(beta);
+ GGML_UNUSED(c);
+ GGML_UNUSED(ldc);
throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces "
"Project does not support this API.");
#else
template <typename T>
T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
- int logical_sub_group_size = 32)
+ unsigned int logical_sub_group_size = 32)
{
unsigned int id = g.get_local_linear_id();
unsigned int start_index =
}
#else
return q.memcpy(to_ptr, from_ptr, size, dep_events);
+ GGML_UNUSED(direction);
#endif // DPCT_USM_LEVEL_NONE
}
std::ofstream logfile;
logfile.open(filename);
// printf("local buf element %d\n", total_elements);
- for(int i=0; i<total_elements; i++){
+ for(size_t i=0; i<total_elements; i++){
if((i+1)%20 ==0) logfile <<std::endl;
else logfile << local_buf[i] <<" ";
}
static __dpct_inline__ float op_repeat(const float a, const float b) {
return b;
+ GGML_UNUSED(a);
}
static __dpct_inline__ float op_add(const float a, const float b) {
// g_sycl_pool_handles[GGML_SYCL_MAX_DEVICES];
static dpct::device_ptr g_sycl_pool_addr[GGML_SYCL_MAX_DEVICES] = {0};
static size_t g_sycl_pool_used[GGML_SYCL_MAX_DEVICES] = {0};
-static const size_t SYCL_POOL_VMM_MAX_SIZE = 1ull << 36; // 64 GB
static void *ggml_sycl_pool_malloc_vmm(size_t size, size_t *actual_size) try {
-
+ GGML_UNUSED(size);
+ GGML_UNUSED(actual_size);
return NULL;
}
catch (sycl::exception const &exc) {
if(id!=user_device_id) continue;
device_inx++;
- int device_vmm = 0;
- g_device_caps[device_inx].vmm = !!device_vmm;
+ g_device_caps[device_inx].vmm = 0;
g_device_caps[device_inx].device_id = id;
g_sycl_device_id2index[id].index = device_inx;
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
prop, dpct::dev_mgr::instance().get_device(id))));
- // fprintf(stderr,
- // " Device %d: %s, compute capability %d.%d, VMM: %s\n", id,
- // prop.get_name(), prop.get_major_version(),
- // prop.get_minor_version(), device_vmm ? "yes" : "no");
-
g_tensor_split[device_inx] = total_vram;
total_vram += prop.get_global_mem_size();
g_device_caps[device_inx].cc =
100 * prop.get_major_version() + 10 * prop.get_minor_version();
- // printf("g_device_caps[%d].cc=%d\n", device_inx, g_device_caps[device_inx].cc);
}
device_inx = -1;
for (int id = 0; id < g_all_sycl_device_count; ++id) {
// ldc == nrows of the matrix that cuBLAS writes into
int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
- const int compute_capability = g_device_caps[id].cc;
#ifdef GGML_SYCL_F16
bool use_fp16 = true; // TODO(Yu) SYCL capability check
#else
continue;
}
- int can_access_peer;
+ // int can_access_peer;
// SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other));
// if (can_access_peer) {
// if (enable_peer_access) {
const int64_t ne01 = src0->ne[1];
const int64_t ne02 = src0->ne[2];
const int64_t ne03 = src0->ne[3];
- const int64_t nrows0 = ggml_nrows(src0);
const int64_t ne10 = src1->ne[0];
const int64_t ne11 = src1->ne[1];
src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get();
dst_row_extra.data_device[g_main_device_index] = dst_contiguous.get();
- const dpct::memcpy_direction src1_kind =
- src1->backend == GGML_BACKEND_CPU ? dpct::host_to_device
- : dpct::device_to_device;
- const dpct::memcpy_direction dst_kind = dst->backend == GGML_BACKEND_CPU
- ? dpct::device_to_host
- : dpct::device_to_device;
-
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
const struct ggml_tensor * src0_row = dst->src[row_id + 2];