ggml_tensor *tensor) try {
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
- if (tensor->view_src != NULL && tensor->view_offs == 0) {
+ if (tensor->view_src != NULL) {
assert(tensor->view_src->buffer->buft == buffer->buft);
- tensor->backend = tensor->view_src->backend;
- tensor->extra = tensor->view_src->extra;
return;
}
auto dev_count = ggml_backend_sycl_get_device_count();
if (device>=dev_count or device<0) {
- printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
+ GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
device, dev_count-1);
GGML_ASSERT(device<dev_count);
}
int device = ctx->device;
if (device>=ggml_sycl_info().device_count or device<0) {
- printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
+ GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
device, ggml_sycl_info().device_count-1);
GGML_ASSERT(device<ggml_sycl_info().device_count);
}
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
}
- // FIXME: do not crash if cudaMalloc fails
+ // FIXME: do not crash if SYCL Buffer alloc fails
// currently, init_tensor cannot fail, it needs to be fixed in ggml-backend first
ggml_sycl_set_device(i);
const queue_ptr stream = ctx->streams[i];
CHECK_TRY_ERROR(extra->events[i][is] = new sycl::event()));
}
}
- tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT;
tensor->extra = extra;
}
catch (sycl::exception const &exc) {
dpct::memcpy_direction kind;
char * src_ptr;
- if (src->backend == GGML_BACKEND_TYPE_CPU) {
+ if (ggml_backend_buffer_is_host(src->buffer)) {
kind = dpct::host_to_device;
+ //GGML_SYCL_DEBUG("%s: Host buffer type src tensor\n", __func__);
src_ptr = (char *) src->data;
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
- } else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
- GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
+ } else if (ggml_backend_buffer_is_sycl(src->buffer)) {
+ // If buffer is a SYCL buffer
+ //GGML_SYCL_DEBUG("%s: SYCL buffer type src tensor\n", __func__);
+ kind = dpct::device_to_device;
+ src_ptr = (char *) src->data;
+ } else if (ggml_backend_buffer_is_sycl_split(src->buffer)) {
+ /*
+ If buffer is a SYCL split buffer
+ */
+ //GGML_SYCL_DEBUG("%s: Split buffer type src tensor\n", __func__);
+ GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]);
kind = dpct::device_to_device;
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
int id;
const int nb2 = dst->nb[2];
const int nb3 = dst->nb[3];
- GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
- GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
+ GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer));
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
- const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
+ const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
GGML_ASSERT(!(split && ne02 > 1));
GGML_ASSERT(!(split && ne03 > 1));
GGML_ASSERT(!(split && ne02 < ne12));
const ggml_tensor *src1,
ggml_tensor *dst) try {
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
- GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(!ggml_is_transposed(src0));
GGML_ASSERT(!ggml_is_transposed(src1));
GGML_ASSERT(!ggml_is_permuted(src0));
- GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
ggml_tensor *dst) try {
GGML_ASSERT(!ggml_is_transposed(src0));
GGML_ASSERT(!ggml_is_transposed(src1));
- GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer));
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_TENSOR_BINARY_OP_LOCALS
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) {
GGML_UNUSED(reg);
- // TODO: update to the current function signature
- //if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
- // return (void *)ggml_backend_sycl_split_buffer_type;
- //}
+ if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
+ return (void *)ggml_backend_sycl_split_buffer_type;
+ }
// SYCL doesn't support registering host memory, left here for reference
// "ggml_backend_register_host_buffer"