//
#include "common.hpp"
+#include "ggml-impl.h"
int get_current_device_id() {
return dpct::dev_mgr::instance().current_device_id();
if (err != 0) {
// clear the error
- fprintf(
- stderr,
- "WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
- size / 1024.0 / 1024.0,
- "syclGetErrorString is not supported");
+ GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0, "syclGetErrorString is not supported");
return nullptr;
}
void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
const ggml_sycl_op_flatten_t op) try {
- const int64_t nrows0 = ggml_nrows(src0);
const bool use_src1 = src1 != nullptr;
- const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
- ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
- ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
- ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
-
// dd = data device
float * src0_ddf = (float *) src0->data;
float * src1_ddf = use_src1 ? (float *) src1->data : nullptr;
});
}
}
+ GGML_UNUSED(ctx);
}
};
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
- if (item_ct1.get_group(1) < ne01) { // src0
+ if (item_ct1.get_group(1) < (size_t) ne01) { // src0
int offset_src =
nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
dst[offset_dst] = x[offset_src];
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
- if (item_ct1.get_group(0) < ne02) { // src0
+ if (item_ct1.get_group(0) < (size_t) ne02) { // src0
int offset_src = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
dst[offset_dst] = x[offset_src];
const int64_t global_id = item_ct1.get_local_id(2) + work_group_size * item_ct1.get_group(2);
// make each work-item deal with more elements since sycl global range can not exceed max int
- const src_t * x = (src_t *) vx;
+ const src_t * x = (const src_t *) vx;
for (int64_t i = global_id; i < k; i += work_group_size * item_ct1.get_group_range(2)) {
y[i] = x[i];
}
break;
}
- (void) src1;
- (void) dst;
- (void) src1_ddq_i;
- (void) src1_ncols;
- (void) src1_padded_row_size;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_ddq_i);
+ GGML_UNUSED(src1_ncols);
+ GGML_UNUSED(src1_padded_row_size);
}
std::map<byte_t *, allocation>::iterator get_map_iterator(const void *ptr)
{
- auto it = m_map.upper_bound((byte_t *)ptr);
+ auto it = m_map.upper_bound(const_cast<byte_t *>(reinterpret_cast<const byte_t *>(ptr)));
if (it == m_map.end())
{
// Not a virtual pointer.
int i02 = i12 / sf2;
int i03 = i13 / sf3;
- dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
+ dst[index] = *(const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
}
void pad_f32(const float *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02,
// operation
int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
- if (nidx < ne00 && item_ct1.get_group(1) < ne01 &&
- item_ct1.get_group(0) < ne02) {
+ if (nidx < ne00 && item_ct1.get_group(1) < (size_t) ne01 && item_ct1.get_group(0) < (size_t) ne02) {
int offset_src = nidx + item_ct1.get_group(1) * ne00 +
item_ct1.get_group(0) * ne00 * ne01;
dst[offset_dst] = x[offset_src];
silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
GGML_ASSERT( dst->type == GGML_TYPE_F32);
tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
src0->ne[0], src0->ne[1], src0->ne[2],
dst->ne[0], dst->ne[1], dst->ne[2], main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);
- (void) dst;
+ GGML_UNUSED(dst);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
- auto a_mem = dnnl::memory(a_in_md, eng, (void*)a);
- auto b_mem = dnnl::memory(b_in_md, eng, (void*)b);
+ auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
+ auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md);
auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c);
const auto a_in_md = dnnl::memory::desc(a_dims, at, a_trans ? tag::ba : tag::ab);
const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_trans ? tag::ba : tag::ab);
const auto c_md = dnnl::memory::desc(c_dims, ct, tag::ab);
- auto a_mem = dnnl::memory(a_in_md, eng, (void*)a);
- auto b_mem = dnnl::memory(b_in_md, eng, (void*)b);
+ auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
+ auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
auto matmul_pd = dnnl::matmul::primitive_desc(eng, a_in_md, b_in_md, c_md);
auto c_mem = dnnl::memory(matmul_pd.dst_desc(), eng, c);
info.device_count = dpct::dev_mgr::instance().device_count();
if (info.device_count == 0) {
- GGML_LOG_ERROR("%s: failed to initialize " GGML_SYCL_NAME ": %s\n", __func__);
+ GGML_LOG_ERROR("%s: failed to initialize: %s\n", GGML_SYCL_NAME, __func__);
return info;
}
#else
GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
#endif
- GGML_LOG_INFO("%s: found %d " GGML_SYCL_NAME " devices:\n", __func__, info.device_count);
+ GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME);
for (int i = 0; i < info.device_count; ++i) {
info.devices[i].vmm = 0;
for (int id = 0; id < device_count; ++id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
- sycl::backend backend = device.get_backend();
std::string backend_type = get_device_backend_and_type(device);
int type_id = DeviceNums[backend_type]++;
std::stringstream device_type;
return true;
}
return false;
+ GGML_UNUSED(buffer);
+} catch (const sycl::exception & exc) {
+ std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
+ std::exit(1);
}
-catch (sycl::exception const &exc) {
- std::cerr << exc.what() << "Exception caught at file:" << __FILE__
- << ", line:" << __LINE__ << std::endl;
- std::exit(1);
-}
-
static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
uint8_t value) try {
ggml_sycl_buffer buffer_pool[MAX_SYCL_BUFFERS] = {};
size_t pool_size = 0;
- explicit ggml_sycl_pool_leg(queue_ptr qptr_, int device_) :
- qptr(qptr_),
- device(device_) {
- }
+ explicit ggml_sycl_pool_leg(queue_ptr qptr_, int device_) : device(device_), qptr(qptr_) {}
~ggml_sycl_pool_leg() {
for (int i = 0; i < MAX_SYCL_BUFFERS; ++i) {
zeros[i] = 0.f;
qzeros[i] = 0;
}
- const TC xi = ix < kx ? *(TC *)&x[iy * kx + ix] : zeros;
+ const TC xi = ix < kx ? *(const TC *)&x[iy * kx + ix] : zeros;
float sum = xi[0];
float amax = sycl::fabs(xi[0]);
#pragma unroll
switch (op) {
case GGML_OP_POOL_AVG: res = 0; break;
case GGML_OP_POOL_MAX: res = -FLT_MAX; break;
+ default:
+ res = (To) sycl::nan(uint32_t(0));
+ break;
}
for (int i = bh; i < eh; i += 1) {
switch (op) {
case GGML_OP_POOL_AVG: res += (cur / (kh * kw)); break;
case GGML_OP_POOL_MAX: res = sycl::max(res, (To)cur); break;
+ default:
+ res = (To) sycl::nan(uint32_t(0));
+ break;
}
}
}
s3, nb01, nb02, nb03, s10, s11, s12, item_ct1);
});
- (void) dst;
+ GGML_UNUSED(dst);
+ GGML_UNUSED(ctx);
}
template <typename src0_t>
});
}
- (void) dst;
+ GGML_UNUSED(dst);
+ GGML_UNUSED(ctx);
}
-
static void quantize_row_q8_1_sycl(const float *x, void *vy, const int kx,
const int ky, const int kx_padded,
queue_ptr stream) {
ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_repeat>>(ctx, dst, src0, dst, nullptr, src0_d, dst_d, main_stream);
- (void) src1;
- (void) src1_d;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(src1_d);
}
const int64_t ne00 = src0->ne[0];
const int64_t ne10 = src1->ne[0];
- const int64_t ne0 = dst->ne[0];
const int64_t row_diff = row_high - row_low;
int id;
SYCL_CHECK(
CHECK_TRY_ERROR(id = get_current_device_id()));
-
+#if !GGML_SYCL_DNNL
+ const int64_t ne0 = dst->ne[0];
// the main device has a larger memory buffer to hold the results from all GPUs
// ldc == nrows of the matrix that cuBLAS writes into
int ldc = id == ctx.device ? ne0 : row_diff;
+#endif
#ifdef GGML_SYCL_F16
bool use_fp16 = true; // TODO(Yu) SYCL capability check
: src1_as_f16.get();
ggml_sycl_pool_alloc<sycl::half> dst_f16(ctx.pool(), row_diff * src1_ncols);
- const sycl::half alpha_f16 = 1.0f;
- const sycl::half beta_f16 = 0.0f;
#if !GGML_SYCL_DNNL
+ const sycl::half alpha_f16 = 1.0f;
+ const sycl::half beta_f16 = 0.0f;
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm(
*stream, oneapi::mkl::transpose::trans,
oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
const float * src0_ddf_i = src0->type == GGML_TYPE_F32 ? (const float *) src0_dd_i : src0_ddq_as_f32.get();
const float * src1_ddf1_i = src1->type == GGML_TYPE_F32 ? (const float *) src1_ddf_i : src1_ddq_as_f32.get();
- const float alpha = 1.0f;
- const float beta = 0.0f;
#if !GGML_SYCL_DNNL
+ const float alpha = 1.0f;
+ const float beta = 0.0f;
# ifdef GGML_SYCL_NVIDIA
SYCL_CHECK(CHECK_TRY_ERROR(oneapi::mkl::blas::column_major::gemm(
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ *stream }, oneapi::mkl::transpose::trans,
src0_ddf_i, DnnlGemmWrapper::to_dt<float>(), dst_dd_i, DnnlGemmWrapper::to_dt<float>());
#endif
}
- (void) dst;
- (void) src1_ddq_i;
- (void) src1_padded_row_size;
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_ddq_i);
+ GGML_UNUSED(src1_padded_row_size);
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
item_ct1);
});
- (void) src1;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
argsort_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, order, main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
argmax_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
*/
SYCL_CHECK(0);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
*/
SYCL_CHECK(0);
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
- ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
const bool src0_is_contiguous = ggml_is_contiguous(src0);
const bool src1_is_contiguous = ggml_is_contiguous(src1);
GGML_TENSOR_BINARY_OP_LOCALS
- const int64_t ne_dst = ggml_nelements(dst);
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
queue_ptr main_stream = ctx.stream();;
inline bool ggml_sycl_supports_mmq(enum ggml_type type) {
// TODO: accuracy issues in MMQ
+ GGML_UNUSED(type);
return false;
}
GGML_ABORT("fatal error");
}
- (void) dst;
+ GGML_UNUSED(dst);
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
// TODO: why do we pass dst as src1 here?
ggml_sycl_cpy(ctx, src0, dst, nullptr);
- (void) src1;
+ GGML_UNUSED(src1);
}
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
}
static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- (void) src0;
- (void) src1;
- (void) dst;
+ GGML_UNUSED(src0);
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(ctx);
}
void ggml_sycl_set_main_device(const int main_device) try {
- if (dpct::get_current_device_id() == main_device) return;
+ if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) {
+ return;
+ }
check_allow_gpu_index(main_device);
dpct::select_device(main_device);
{
ggml_backend_sycl_context *sycl_ctx =
(ggml_backend_sycl_context *)backend->context;
+
sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
const queue_ptr &stream = sycl_ctx->stream(sycl_ctx->device, 0);
}
static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
- ggml_backend_sycl_context* sycl_ctx = static_cast<ggml_backend_sycl_context*>(backend->context);
+
sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
if (ggml_backend_is_sycl(backend)) {
// SYCL doesn't support registering host memory, left here for reference
// "ggml_backend_register_host_buffer"
// "ggml_backend_unregister_host_buffer"
+ GGML_UNUSED(name);
return nullptr;
}
im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
}
- (void) src0;
- (void) src0_dd;
+ GGML_UNUSED(src0);
+ GGML_UNUSED(src0_dd);
+ GGML_UNUSED(ctx);
}
x_ql[i * (WARP_SIZE + 1) + k] = get_int_from_uint8_aligned(bxi->qs, kqsx);
}
- const int blocks_per_tile_x_row = WARP_SIZE / QI4_K; // == 1 if QK_K == 256
+ constexpr int blocks_per_tile_x_row = QI4_K > WARP_SIZE ? 1 : WARP_SIZE / QI4_K; // == 1 if QK_K == 256
const int kbxd = k % blocks_per_tile_x_row; // == 0 if QK_K == 256
#pragma unroll
x_ql[i * (2*WARP_SIZE + 1) + kq1] = ql1 | qh1;
}
- const int blocks_per_tile_x_row = WARP_SIZE / QI5_K; // == 1 if QK_K == 256
+ constexpr int blocks_per_tile_x_row = QI5_K > WARP_SIZE ? 1 : WARP_SIZE / QI5_K; // == 1 if QK_K == 256
const int kbxd = k % blocks_per_tile_x_row; // == 0 if QK_K == 256
#pragma unroll
dpct::sub_sat());
}
- const int blocks_per_tile_x_row = WARP_SIZE / QI6_K; // == 1 if QK_K == 256
+ constexpr int blocks_per_tile_x_row = QI6_K > WARP_SIZE ? 1 : WARP_SIZE / QI6_K; // == 1 if QK_K == 256
const int kbxd = k % blocks_per_tile_x_row; // == 0 if QK_K == 256
float * x_dmf = (float *) x_dm;
break;
}
- (void) src1;
- (void) dst;
- (void) src1_ddf_i;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_ddf_i);
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, QK_WARP_SIZE);
{
-
- stream->submit([&](sycl::handler &cgh) {
- auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
- auto ksigns64_ptr_ct1 = &ksigns64[0];
-
+ stream->submit([&](sycl::handler & cgh) {
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1)
{
stream->submit([&](sycl::handler &cgh) {
- auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
- auto ksigns64_ptr_ct1 = &ksigns64[0];
-
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1)
{
stream->submit([&](sycl::handler &cgh) {
- auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
- auto ksigns64_ptr_ct1 = &ksigns64[0];
-
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1)
{
stream->submit([&](sycl::handler &cgh) {
- auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
-
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1)
{
stream->submit([&](sycl::handler &cgh) {
- auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
- auto ksigns64_ptr_ct1 = &ksigns64[0];
-
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1)
const size_t q8_1_bs = QK8_1;
// the main device has a larger memory buffer to hold the results from all GPUs
// nrows_dst == nrows of the matrix that the kernel writes into
- const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff;
+
for (int i = 0; i < src1_ncols; i++)
{
const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;
break;
}
}
- (void) src1;
- (void) dst;
- (void) src1_ddf_i;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_ddf_i);
+ GGML_UNUSED(ctx);
}
*/
item_ct1.barrier(sycl::access::fence_space::local_space);
mean_var = 0.f;
- int nreduce = nwarps / WARP_SIZE;
+ size_t nreduce = nwarps / WARP_SIZE;
for (size_t i = 0; i < nreduce; i += 1)
{
mean_var += s_sum[lane_id + i * WARP_SIZE];
const int nthreads = item_ct1.get_local_range(2);
const int nwarps = nthreads / WARP_SIZE;
start += item_ct1.get_local_id(2);
- int nreduce = nwarps / WARP_SIZE;
+ size_t nreduce = nwarps / WARP_SIZE;
if (end >= ne_elements) {
end = ne_elements;
converged control flow. You may need to adjust the code.
*/
item_ct1.barrier(sycl::access::fence_space::local_space);
- int nreduce = nwarps / WARP_SIZE;
+ size_t nreduce = nwarps / WARP_SIZE;
tmp = 0.f;
for (size_t i = 0; i < nreduce; i += 1)
{
(void)src1;
(void)dst;
(void)src1_dd;
+ GGML_UNUSED(ctx);
}
void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
}
}
- (void) src1;
- (void) dst;
- (void) src1_dd;
+ GGML_UNUSED(src1);
+ GGML_UNUSED(dst);
+ GGML_UNUSED(src1_dd);
+ GGML_UNUSED(ctx);
}
const int lane_id = item_ct1.get_local_id(2) % WARP_SIZE;
const int nthreads = block_size;
const int nwarps = nthreads / WARP_SIZE;
- int nreduce = nwarps / WARP_SIZE;
+ size_t nreduce = nwarps / WARP_SIZE;
float slope = 1.0f;
// ALiBi
if (block_size > WARP_SIZE) {
if (warp_id == 0) {
buf[lane_id] = -INFINITY;
- for (size_t i = 1; i < nreduce; i += 1)
+ for (size_t i = 1; i < nreduce; i += 1) {
buf[lane_id + i * WARP_SIZE] = -INFINITY;
+ }
}
item_ct1.barrier(sycl::access::fence_space::local_space);
}
item_ct1.barrier(sycl::access::fence_space::local_space);
max_val = buf[lane_id];
- for (size_t i = 1; i < nreduce; i += 1)
- {
+ for (size_t i = 1; i < nreduce; i += 1) {
max_val = std::max(max_val, buf[lane_id + i * WARP_SIZE]);
}
max_val = warp_reduce_max(max_val, item_ct1);
item_ct1.barrier(sycl::access::fence_space::local_space);
if (warp_id == 0) {
buf[lane_id] = 0.f;
- for (size_t i = 1; i < nreduce; i += 1)
+ for (size_t i = 1; i < nreduce; i += 1) {
buf[lane_id + i * WARP_SIZE] = 0.f;
+ }
}
item_ct1.barrier(sycl::access::fence_space::local_space);
item_ct1.barrier(sycl::access::fence_space::local_space);
tmp = buf[lane_id];
- for (size_t i = 1; i < nreduce; i += 1)
- {
+ for (size_t i = 1; i < nreduce; i += 1) {
tmp += buf[lane_id + i * WARP_SIZE];
}
tmp = warp_reduce_sum(tmp, item_ct1);
const int max_period = dst->op_params[1];
timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
+ GGML_UNUSED(src1);
}
float y = 0;
// Process in chunks of 4 for better vectorization
- sycl::float4 k4, r4, tf4, td4, s4, kv4;
+ sycl::float4 k4, r4, tf4, td4, s4;
#pragma unroll
for (int j = 0; j < head_size; j += 4) {
// Load data in vec4 chunks
);
});
});
+
+ GGML_UNUSED(src0);
+ GGML_UNUSED(src1);
}