int ixj = col ^ j;
if (ixj > col) {
if ((col & k) == 0) {
- if (order == GGML_SORT_ASC ? x_row[dst_row[col]] > x_row[dst_row[ixj]] : x_row[dst_row[col]] < x_row[dst_row[ixj]]) {
+ if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] > x_row[dst_row[ixj]] : x_row[dst_row[col]] < x_row[dst_row[ixj]]) {
swap(dst_row[col], dst_row[ixj]);
}
} else {
- if (order == GGML_SORT_ASC ? x_row[dst_row[col]] < x_row[dst_row[ixj]] : x_row[dst_row[col]] > x_row[dst_row[ixj]]) {
+ if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] < x_row[dst_row[ixj]] : x_row[dst_row[col]] > x_row[dst_row[ixj]]) {
swap(dst_row[col], dst_row[ixj]);
}
}
const dim3 block_dims(ncols, 1, 1);
const dim3 block_nums(1, nrows, 1);
- if (order == GGML_SORT_ASC) {
- k_argsort_f32_i32<GGML_SORT_ASC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
- } else if (order == GGML_SORT_DESC) {
- k_argsort_f32_i32<GGML_SORT_DESC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
+ if (order == GGML_SORT_ORDER_ASC) {
+ k_argsort_f32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
+ } else if (order == GGML_SORT_ORDER_DESC) {
+ k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
} else {
GGML_ASSERT(false);
}
cudaMemcpyKind kind;
char * src_ptr;
- if (src->backend == GGML_BACKEND_CPU) {
+ if (src->backend == GGML_BACKEND_TYPE_CPU) {
kind = cudaMemcpyHostToDevice;
src_ptr = (char *) src->data;
- } else if (src->backend == GGML_BACKEND_GPU || src->backend == GGML_BACKEND_GPU_SPLIT) {
- GGML_ASSERT(src->backend != GGML_BACKEND_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
+ } else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
+ GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
kind = cudaMemcpyDeviceToDevice;
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
int id;
// the main device has a larger memory buffer to hold the results from all GPUs
// nrows_dst == nrows of the matrix that the kernel writes into
- const int64_t nrows_dst = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff;
+ const int64_t nrows_dst = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device ? ne0 : row_diff;
switch (src0->type) {
case GGML_TYPE_Q4_0:
// the main device has a larger memory buffer to hold the results from all GPUs
// nrows_dst == nrows of the matrix that the kernel writes into
- const int64_t nrows_dst = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff;
+ const int64_t nrows_dst = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device ? ne0 : row_diff;
switch (src0->type) {
case GGML_TYPE_Q4_0:
// the main device has a larger memory buffer to hold the results from all GPUs
// ldc == nrows of the matrix that cuBLAS writes into
- int ldc = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff;
+ int ldc = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device ? ne0 : row_diff;
const int compute_capability = g_device_caps[id].cc;
const bool use_src2 = src2 != nullptr;
if (use_src2) {
- const bool src2_on_device = src2->backend == GGML_BACKEND_GPU;
+ const bool src2_on_device = src2->backend == GGML_BACKEND_TYPE_GPU;
if (src2_on_device) {
ggml_tensor_extra_gpu * src2_extra = (ggml_tensor_extra_gpu *) src2->extra;
const bool use_src1 = src1 != nullptr;
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
- GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_GPU_SPLIT);
- GGML_ASSERT( dst->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
- const bool src0_on_device = src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT;
- const bool src1_on_device = use_src1 && src1->backend == GGML_BACKEND_GPU;
- const bool dst_on_device = dst->backend == GGML_BACKEND_GPU;
+ const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
+ const bool src1_on_device = use_src1 && src1->backend == GGML_BACKEND_TYPE_GPU;
+ const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU;
// dd = data device
float * src0_ddf = nullptr;
CUDA_CHECK(cudaMemcpyAsync(dst->data, dst_ddf, ggml_nbytes(dst), cudaMemcpyDeviceToHost, main_stream));
}
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
CUDA_CHECK(cudaDeviceSynchronize());
}
}
const int nb2 = dst->nb[2];
const int nb3 = dst->nb[3];
- GGML_ASSERT(dst->backend != GGML_BACKEND_GPU_SPLIT);
- GGML_ASSERT(src1->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
- const bool src0_on_device = src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT;
+ const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
const bool src0_is_contiguous = ggml_is_contiguous(src0);
const bool src1_is_contiguous = ggml_is_contiguous(src1);
const int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
- const bool split = src0->backend == GGML_BACKEND_GPU_SPLIT;
+ const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
GGML_ASSERT(!(split && ne02 > 1));
GGML_ASSERT(!(split && ne03 > 1));
GGML_ASSERT(!(split && ne02 < ne12));
std::array<float, GGML_CUDA_MAX_DEVICES> tensor_split;
if (split) {
- // TODO: check that src0->buffer->buft is a split buffer type, replace GGML_BACKEND_GPU_SPLIT check
+ // TODO: check that src0->buffer->buft is a split buffer type, replace GGML_BACKEND_TYPE_GPU_SPLIT check
// GGML_ASSERT(src0->buffer != nullptr && src0->buffer->buft == ...);
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) src0->buffer->buft->context;
tensor_split = buft_ctx->tensor_split;
used_devices++;
- const bool src1_on_device = src1->backend == GGML_BACKEND_GPU && id == g_main_device;
- const bool dst_on_device = dst->backend == GGML_BACKEND_GPU && id == g_main_device;
+ const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
+ const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
ggml_cuda_set_device(id);
cudaStream_t stream = g_cudaStreams[id][0];
continue;
}
- const bool src1_on_device = src1->backend == GGML_BACKEND_GPU && id == g_main_device;
- const bool dst_on_device = dst->backend == GGML_BACKEND_GPU && id == g_main_device;
+ const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
+ const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
const int64_t row_diff = dev[id].row_high - dev[id].row_low;
ggml_cuda_set_device(id);
// the main device memory buffer can be on VRAM scratch, with space for all partial results
// in that case an offset on dst_ddf_i is needed
- if (dst->backend == GGML_BACKEND_GPU && id == g_main_device) {
+ if (dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device) {
dst_dd_i += dev[id].row_low; // offset is 0 if no tensor split
}
// copy src0, src1 to device if necessary
- if (src1->backend == GGML_BACKEND_GPU && src1_is_contiguous) {
+ if (src1->backend == GGML_BACKEND_TYPE_GPU && src1_is_contiguous) {
if (id != g_main_device) {
if (convert_src1_to_q8_1) {
char * src1_ddq_i_source = dev[g_main_device].src1_ddq + src1_ddq_i_offset;
src1_ncols*ne10*sizeof(float), stream));
}
}
- } else if (src1->backend == GGML_BACKEND_CPU || (src1_on_device && !src1_is_contiguous)) {
+ } else if (src1->backend == GGML_BACKEND_TYPE_CPU || (src1_on_device && !src1_is_contiguous)) {
CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
} else {
GGML_ASSERT(false);
}
- if (convert_src1_to_q8_1 && (src1->backend == GGML_BACKEND_CPU || !src1_is_contiguous)) {
+ if (convert_src1_to_q8_1 && (src1->backend == GGML_BACKEND_TYPE_CPU || !src1_is_contiguous)) {
quantize_row_q8_1_cuda(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
CUDA_CHECK(cudaGetLastError());
}
if (!dst_on_device) {
void * dst_off_device;
cudaMemcpyKind kind;
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
dst_off_device = dst->data;
kind = cudaMemcpyDeviceToHost;
- } else if (dst->backend == GGML_BACKEND_GPU) {
+ } else if (dst->backend == GGML_BACKEND_TYPE_GPU) {
dst_off_device = dst_extra->data_device[g_main_device];
kind = cudaMemcpyDeviceToDevice;
} else {
}
}
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
ggml_cuda_set_device(g_main_device);
CUDA_CHECK(cudaDeviceSynchronize());
}
static void ggml_cuda_mul_mat_vec_p021(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst){
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
- GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(!ggml_is_transposed(src0));
GGML_ASSERT(!ggml_is_transposed(src1));
GGML_ASSERT(!ggml_is_permuted(src0));
- GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(!ggml_is_transposed(src0));
GGML_ASSERT(!ggml_is_transposed(src1));
- GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_TENSOR_BINARY_OP_LOCALS
static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
const bool all_on_device =
- (src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT) &&
- (src1->backend == GGML_BACKEND_GPU) &&
- ( dst->backend == GGML_BACKEND_GPU);
+ (src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT) &&
+ (src1->backend == GGML_BACKEND_TYPE_GPU) &&
+ ( dst->backend == GGML_BACKEND_TYPE_GPU);
- const bool split = src0->backend == GGML_BACKEND_GPU_SPLIT;
+ const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
int64_t min_compute_capability = INT_MAX;
GGML_ASSERT(!ggml_is_transposed(src00));
GGML_ASSERT(!ggml_is_transposed(src1));
- GGML_ASSERT(src00->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src00->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
const int64_t ne00 = src00->ne[0]; GGML_UNUSED(ne00);
cudaStream_t stream = g_cudaStreams[g_main_device][0];
- if (ids->backend == GGML_BACKEND_GPU) {
+ if (ids->backend == GGML_BACKEND_TYPE_GPU) {
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device];
CUDA_CHECK(cudaMemcpyAsync(ids_host.data(), ids_dev, ggml_nbytes(ids), cudaMemcpyDeviceToHost, stream));
CUDA_CHECK(cudaStreamSynchronize(stream));
ggml_tensor src1_row = *src1;
ggml_tensor dst_row = *dst;
- src1_row.backend = GGML_BACKEND_GPU;
- dst_row.backend = GGML_BACKEND_GPU;
+ src1_row.backend = GGML_BACKEND_TYPE_GPU;
+ dst_row.backend = GGML_BACKEND_TYPE_GPU;
src1_row.extra = &src1_row_extra;
dst_row.extra = &dst_row_extra;
- char * src1_original = src1->backend == GGML_BACKEND_CPU ?
+ char * src1_original = src1->backend == GGML_BACKEND_TYPE_CPU ?
(char *) src1->data : (char *) src1_extra->data_device[g_main_device];
- char * dst_original = dst->backend == GGML_BACKEND_CPU ?
+ char * dst_original = dst->backend == GGML_BACKEND_TYPE_CPU ?
(char *) dst->data : (char *) dst_extra->data_device[g_main_device];
if (src1->ne[1] == 1) {
- GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
- GGML_ASSERT(dst->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
+ GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
//int32_t row_id;
src1_row_extra.data_device[g_main_device] = src1_contiguous.get();
dst_row_extra.data_device[g_main_device] = dst_contiguous.get();
- const cudaMemcpyKind src1_kind = src1->backend == GGML_BACKEND_CPU ?
+ const cudaMemcpyKind src1_kind = src1->backend == GGML_BACKEND_TYPE_CPU ?
cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice;
- const cudaMemcpyKind dst_kind = dst->backend == GGML_BACKEND_CPU ?
+ const cudaMemcpyKind dst_kind = dst->backend == GGML_BACKEND_TYPE_CPU ?
cudaMemcpyDeviceToHost : cudaMemcpyDeviceToDevice;
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
}
}
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
CUDA_CHECK(cudaStreamSynchronize(stream));
}
}
const int64_t ne = ggml_nelements(src0);
GGML_ASSERT(ne == ggml_nelements(src1));
- GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
- GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
+ GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
if (!g_cublas_loaded) return false;
ggml_cuda_func_t func;
- const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
- || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
- || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU);
+ const bool any_on_device = tensor->backend == GGML_BACKEND_TYPE_GPU
+ || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
+ || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_TYPE_GPU);
if (!any_on_device && tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_MUL_MAT_ID) {
return false;
return false;
}
- if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT) {
+ if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
ggml_cuda_set_peer_access(tensor->src[1]->ne[1]);
}
if (params->ith != 0) {
return true;
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return true;
}
func(tensor->src[0], tensor->src[1], tensor);
extra->data_device[ctx->device] = tensor->data;
- tensor->backend = GGML_BACKEND_GPU;
+ tensor->backend = GGML_BACKEND_TYPE_GPU;
tensor->extra = extra;
if (ggml_is_quantized(tensor->type)) {
}
GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
}
GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
CUDA_CHECK(cudaEventCreateWithFlags(&extra->events[id][is], cudaEventDisableTiming));
}
}
- tensor->backend = GGML_BACKEND_GPU_SPLIT;
+ tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT;
tensor->extra = extra;
}
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0]));
}
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0]));
}
ggml_cuda_set_main_device(cuda_ctx->device);
ggml_compute_params params = {};
- params.type = GGML_TASK_COMPUTE;
+ params.type = GGML_TASK_TYPE_COMPUTE;
params.ith = 0;
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i];
}
#ifndef NDEBUG
- assert(node->backend == GGML_BACKEND_GPU || node->backend == GGML_BACKEND_GPU_SPLIT);
+ assert(node->backend == GGML_BACKEND_TYPE_GPU || node->backend == GGML_BACKEND_TYPE_GPU_SPLIT);
assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
assert(node->extra != nullptr);
for (int j = 0; j < GGML_MAX_SRC; j++) {
if (node->src[j] != nullptr) {
- assert(node->src[j]->backend == GGML_BACKEND_GPU || node->src[j]->backend == GGML_BACKEND_GPU_SPLIT);
+ assert(node->src[j]->backend == GGML_BACKEND_TYPE_GPU || node->src[j]->backend == GGML_BACKEND_TYPE_GPU_SPLIT);
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer));
assert(node->src[j]->extra != nullptr);
}
id<MTLComputePipelineState> pipeline = nil;
switch (order) {
- case GGML_SORT_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break;
- case GGML_SORT_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break;
+ case GGML_SORT_ORDER_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break;
+ case GGML_SORT_ORDER_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break;
default: GGML_ASSERT(false);
};
}
void ggml_cl_free_data(const struct ggml_tensor* tensor) {
- if (tensor->backend != GGML_BACKEND_GPU) {
+ if (tensor->backend != GGML_BACKEND_TYPE_GPU) {
return;
}
}
static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
const int64_t ne00 = src0->ne[0];
const int64_t ne01 = src0->ne[1];
const int64_t ne02 = src0->ne[2];
}
static void ggml_cl_add_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
const int64_t ne00 = src0->ne[0];
const int64_t ne01 = src0->ne[1];
const int64_t ne02 = src0->ne[2];
size_t y_size;
size_t d_size;
cl_mem d_X;
- if (src0->backend == GGML_BACKEND_GPU) { // NOLINT
+ if (src0->backend == GGML_BACKEND_TYPE_GPU) { // NOLINT
d_X = (cl_mem) src0->extra;
} else {
d_X = ggml_cl_pool_malloc(sizeof(float) * x_ne, &x_size);
}
- cl_mem d_Y = src1->backend == GGML_BACKEND_GPU ? (cl_mem) src1->extra : ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
- cl_mem d_D = dst->backend == GGML_BACKEND_GPU ? (cl_mem) dst->extra : ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
+ cl_mem d_Y = src1->backend == GGML_BACKEND_TYPE_GPU ? (cl_mem) src1->extra : ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
+ cl_mem d_D = dst->backend == GGML_BACKEND_TYPE_GPU ? (cl_mem) dst->extra : ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
size_t x_offset = 0;
// TODO: copy src0 here when r3>1
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
for (int64_t i02 = 0; i02 < ne02; i02++) {
- if (src0->backend == GGML_BACKEND_GPU) {
+ if (src0->backend == GGML_BACKEND_TYPE_GPU) {
x_offset = (i03 * ne02 + i02) * x_ne;
} else {
// copy src0 to device
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
// copy src1 to device
- if (src1->backend == GGML_BACKEND_CPU) {
+ if (src1->backend == GGML_BACKEND_TYPE_CPU) {
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
}
}
// copy dst to host
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
}
}
}
- if (src0->backend != GGML_BACKEND_GPU) {
+ if (src0->backend != GGML_BACKEND_TYPE_GPU) {
ggml_cl_pool_free(d_X, x_size);
}
- if (src1->backend != GGML_BACKEND_GPU) {
+ if (src1->backend != GGML_BACKEND_TYPE_GPU) {
ggml_cl_pool_free(d_Y, y_size);
}
- if (dst->backend != GGML_BACKEND_GPU) {
+ if (dst->backend != GGML_BACKEND_TYPE_GPU) {
ggml_cl_pool_free(d_D, d_size);
}
}
size_t y_size;
size_t d_size;
cl_mem d_X;
- if (src0->backend == GGML_BACKEND_GPU) { // NOLINT
+ if (src0->backend == GGML_BACKEND_TYPE_GPU) { // NOLINT
d_X = (cl_mem) src0->extra;
} else {
d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size);
// TODO: copy src0 here when r3>1
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
for (int64_t i02 = 0; i02 < ne02; i02++) {
- if (src0->backend == GGML_BACKEND_GPU) {
+ if (src0->backend == GGML_BACKEND_TYPE_GPU) {
x_offset = (i03 * ne02 + i02) * x_ne;
} else {
// copy src0 to device
}
// copy dst to host, then convert to float
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
ggml_fp16_to_fp32_row(tmp, d, d_ne);
}
}
- if (src0->backend != GGML_BACKEND_GPU) {
+ if (src0->backend != GGML_BACKEND_TYPE_GPU) {
ggml_cl_pool_free(d_X, x_size);
}
ggml_cl_pool_free(d_Y, y_size);
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
cl_mem d_Q;
- if (src0->backend == GGML_BACKEND_CPU) {
+ if (src0->backend == GGML_BACKEND_TYPE_CPU) {
d_Q = ggml_cl_pool_malloc(q_sz, &q_size);
}
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
for (int64_t i02 = 0; i02 < ne02; i02++) {
// copy src0 to device if necessary
- if (src0->backend == GGML_BACKEND_CPU) {
+ if (src0->backend == GGML_BACKEND_TYPE_CPU) {
events.emplace_back();
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
- } else if (src0->backend == GGML_BACKEND_GPU) {
+ } else if (src0->backend == GGML_BACKEND_TYPE_GPU) {
d_Q = (cl_mem) src0->extra;
} else {
GGML_ASSERT(false);
if (!mul_mat_vec) {
// convert src0 to fp32 on device
const size_t global = x_ne / global_denom;
- const size_t offset = src0->backend == GGML_BACKEND_GPU ? (i03 * ne02 + i02) * x_bps : 0;
+ const size_t offset = src0->backend == GGML_BACKEND_TYPE_GPU ? (i03 * ne02 + i02) * x_bps : 0;
CL_CHECK(clSetKernelArg(*to_fp32_cl, 0, sizeof(cl_mem), &d_Q));
CL_CHECK(clSetKernelArg(*to_fp32_cl, 1, sizeof(cl_mem), &d_X));
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
// compute
const size_t global = ne01 * local;
- const size_t offset = src0->backend == GGML_BACKEND_GPU ? (i03 * ne02 + i02) * x_bps : 0;
+ const size_t offset = src0->backend == GGML_BACKEND_TYPE_GPU ? (i03 * ne02 + i02) * x_bps : 0;
const cl_int ncols = ne00;
events.emplace_back();
CL_CHECK(clSetKernelArg(*dmmv, 0, sizeof(cl_mem), &d_Q));
}
ggml_cl_pool_free(d_Y, y_size);
ggml_cl_pool_free(d_D, d_size);
- if (src0->backend == GGML_BACKEND_CPU) {
+ if (src0->backend == GGML_BACKEND_TYPE_CPU) {
ggml_cl_pool_free(d_Q, q_size);
}
}
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
src1->type == GGML_TYPE_F32 &&
dst->type == GGML_TYPE_F32 &&
- ((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU)) {
+ ((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_TYPE_GPU)) {
return true;
}
CL_CHECK(clFinish(queue));
tensor->extra = dst;
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
}
// ggml-backend
ctx->sub_buffers.push_back(sub_buffer);
tensor->extra = sub_buffer;
}
- tensor->backend = GGML_BACKEND_GPU;
+ tensor->backend = GGML_BACKEND_TYPE_GPU;
}
static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
size_t total_elements = ggml_nelements(src);
- const bool src_on_device = src->backend == GGML_BACKEND_GPU || src->backend == GGML_BACKEND_GPU_SPLIT;
+ const bool src_on_device = src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
float *src_data =NULL;
if(src_on_device) {
ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra;
int ixj = col ^ j;
if (ixj > col) {
if ((col & k) == 0) {
- if (order == GGML_SORT_ASC ? x_row[dst_row[col]] > x_row[dst_row[ixj]] : x_row[dst_row[col]] < x_row[dst_row[ixj]]) {
+ if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] > x_row[dst_row[ixj]] : x_row[dst_row[col]] < x_row[dst_row[ixj]]) {
swap(dst_row[col], dst_row[ixj]);
}
} else {
- if (order == GGML_SORT_ASC ? x_row[dst_row[col]] < x_row[dst_row[ixj]] : x_row[dst_row[col]] > x_row[dst_row[ixj]]) {
+ if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] < x_row[dst_row[ixj]] : x_row[dst_row[col]] > x_row[dst_row[ixj]]) {
swap(dst_row[col], dst_row[ixj]);
}
}
const sycl::range<3> block_dims(1, 1, ncols);
const sycl::range<3> block_nums(1, nrows, 1);
- if (order == GGML_SORT_ASC) {
+ if (order == GGML_SORT_ORDER_ASC) {
/*
DPCT1049:44: The work-group size passed to the SYCL kernel may exceed
the limit. To get the device limit, query
stream->parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1) {
- k_argsort_f32_i32<GGML_SORT_ASC>(x, dst, ncols, item_ct1);
+ k_argsort_f32_i32<GGML_SORT_ORDER_ASC>(x, dst, ncols, item_ct1);
});
- } else if (order == GGML_SORT_DESC) {
+ } else if (order == GGML_SORT_ORDER_DESC) {
/*
DPCT1049:45: The work-group size passed to the SYCL kernel may exceed
the limit. To get the device limit, query
stream->parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1) {
- k_argsort_f32_i32<GGML_SORT_DESC>(x, dst, ncols, item_ct1);
+ k_argsort_f32_i32<GGML_SORT_ORDER_DESC>(x, dst, ncols, item_ct1);
});
} else {
GGML_ASSERT(false);
dpct::memcpy_direction kind;
char * src_ptr;
- if (src->backend == GGML_BACKEND_CPU) {
+ if (src->backend == GGML_BACKEND_TYPE_CPU) {
kind = dpct::host_to_device;
src_ptr = (char *) src->data;
- // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_CPU src_ptr %p\n", src_ptr);
- } else if (src->backend == GGML_BACKEND_GPU || src->backend == GGML_BACKEND_GPU_SPLIT) {
- GGML_ASSERT(src->backend != GGML_BACKEND_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
+ // GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
+ } else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
+ GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
kind = dpct::device_to_device;
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
int id;
// the main device has a larger memory buffer to hold the results from all GPUs
// nrows_dst == nrows of the matrix that the dequantize_mul_mat kernel writes into
- const int64_t nrows_dst = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
+ const int64_t nrows_dst = dst->backend == GGML_BACKEND_TYPE_GPU && device_id == g_main_device ? ne0 : row_diff;
switch (src0->type) {
case GGML_TYPE_Q4_0:
// the main device has a larger memory buffer to hold the results from all GPUs
// ldc == nrows of the matrix that cuBLAS writes into
- int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
+ int ldc = dst->backend == GGML_BACKEND_TYPE_GPU && device_id == g_main_device ? ne0 : row_diff;
#ifdef GGML_SYCL_F16
bool use_fp16 = true; // TODO(Yu) SYCL capability check
const bool use_src1 = src1 != nullptr;
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
- GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_GPU_SPLIT);
- GGML_ASSERT( dst->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
- const bool src0_on_device = src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT;
- const bool src1_on_device = use_src1 && src1->backend == GGML_BACKEND_GPU;
- const bool dst_on_device = dst->backend == GGML_BACKEND_GPU;
+ const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
+ const bool src1_on_device = use_src1 && src1->backend == GGML_BACKEND_TYPE_GPU;
+ const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU;
// dd = data device
float * src0_ddf = nullptr;
main_stream->memcpy(dst->data, dst_ddf, ggml_nbytes(dst))));
}
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
SYCL_CHECK(CHECK_TRY_ERROR(
dpct::get_current_device().queues_wait_and_throw()));
}
const int nb2 = dst->nb[2];
const int nb3 = dst->nb[3];
- GGML_ASSERT(dst->backend != GGML_BACKEND_GPU_SPLIT);
- GGML_ASSERT(src1->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
+ GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
- const bool src0_on_device = src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT;
+ const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
const bool src0_is_contiguous = ggml_is_contiguous(src0);
const bool src1_is_contiguous = ggml_is_contiguous(src1);
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
- const bool split = src0->backend == GGML_BACKEND_GPU_SPLIT;
+ const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
GGML_ASSERT(!(split && ne02 > 1));
GGML_ASSERT(!(split && ne03 > 1));
GGML_ASSERT(!(split && ne02 < ne12));
used_devices++;
- const bool src1_on_device = src1->backend == GGML_BACKEND_GPU && id == g_main_device_index;
- const bool dst_on_device = dst->backend == GGML_BACKEND_GPU && id == g_main_device_index;
+ const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
+ const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
ggml_sycl_set_device(get_device_id_by_index(id));
const dpct::queue_ptr stream = g_syclStreams[id][0];
continue;
}
- const bool src1_on_device = src1->backend == GGML_BACKEND_GPU && id == g_main_device_index;
- const bool dst_on_device = dst->backend == GGML_BACKEND_GPU && id == g_main_device_index;
+ const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
+ const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
const int64_t row_diff = row_high[id] - row_low[id];
ggml_sycl_set_device(get_device_id_by_index(id));
// the main device memory buffer can be on VRAM scratch, with space for all partial results
// in that case an offset on dst_ddf_i is needed
- if (dst->backend == GGML_BACKEND_GPU && id == g_main_device_index) {
+ if (dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index) {
dst_dd_i += row_low[id]; // offset is 0 if no tensor split
}
// copy src0, src1 to device if necessary
- if (src1->backend == GGML_BACKEND_GPU && src1_is_contiguous) {
+ if (src1->backend == GGML_BACKEND_TYPE_GPU && src1_is_contiguous) {
if (id != g_main_device_index) {
if (convert_src1_to_q8_1) {
char * src1_ddq_i_source = src1_ddq[g_main_device_index] + src1_ddq_i_offset;
src1_ncols * ne10 * sizeof(float))));
}
}
- } else if (src1->backend == GGML_BACKEND_CPU || (src1_on_device && !src1_is_contiguous)) {
+ } else if (src1->backend == GGML_BACKEND_TYPE_CPU || (src1_on_device && !src1_is_contiguous)) {
SYCL_CHECK(ggml_sycl_cpy_tensor_2d(
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
} else {
GGML_ASSERT(false);
}
- if (convert_src1_to_q8_1 && (src1->backend == GGML_BACKEND_CPU || !src1_is_contiguous)) {
+ if (convert_src1_to_q8_1 && (src1->backend == GGML_BACKEND_TYPE_CPU || !src1_is_contiguous)) {
quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
/*
DPCT1010:92: SYCL uses exceptions to report errors and does
if (!dst_on_device) {
void * dst_off_device;
dpct::memcpy_direction kind;
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
dst_off_device = dst->data;
kind = dpct::device_to_host;
- } else if (dst->backend == GGML_BACKEND_GPU) {
+ } else if (dst->backend == GGML_BACKEND_TYPE_GPU) {
dst_off_device = dst_extra->data_device[g_main_device_index];
kind = dpct::device_to_device;
} else {
}
}
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
SYCL_CHECK(CHECK_TRY_ERROR(
dpct::get_current_device().queues_wait_and_throw()));
const ggml_tensor *src1,
ggml_tensor *dst) try {
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
- GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(!ggml_is_transposed(src0));
GGML_ASSERT(!ggml_is_transposed(src1));
GGML_ASSERT(!ggml_is_permuted(src0));
- GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(!ggml_is_transposed(src0));
GGML_ASSERT(!ggml_is_transposed(src1));
- GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
const bool all_on_device =
- (src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT) &&
- (src1->backend == GGML_BACKEND_GPU) &&
- ( dst->backend == GGML_BACKEND_GPU);
+ (src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT) &&
+ (src1->backend == GGML_BACKEND_TYPE_GPU) &&
+ ( dst->backend == GGML_BACKEND_TYPE_GPU);
- const bool split = src0->backend == GGML_BACKEND_GPU_SPLIT;
+ const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
int64_t min_compute_capability = INT_MAX;
for (int64_t id = 0; id < g_device_count; ++id) {
GGML_ASSERT(!ggml_is_transposed(src00));
GGML_ASSERT(!ggml_is_transposed(src1));
- GGML_ASSERT(src00->backend != GGML_BACKEND_GPU_SPLIT);
+ GGML_ASSERT(src00->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_TENSOR_LOCALS(int64_t, ne0, src00, ne);
const dpct::queue_ptr stream = g_syclStreams[g_main_device_index][0];
- if (ids->backend == GGML_BACKEND_GPU) {
+ if (ids->backend == GGML_BACKEND_TYPE_GPU) {
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device_index];
SYCL_CHECK(CHECK_TRY_ERROR(
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
ggml_tensor src1_row = *src1;
ggml_tensor dst_row = *dst;
- src1_row.backend = GGML_BACKEND_GPU;
- dst_row.backend = GGML_BACKEND_GPU;
+ src1_row.backend = GGML_BACKEND_TYPE_GPU;
+ dst_row.backend = GGML_BACKEND_TYPE_GPU;
src1_row.extra = &src1_row_extra;
dst_row.extra = &dst_row_extra;
- char * src1_original = src1->backend == GGML_BACKEND_CPU ?
+ char * src1_original = src1->backend == GGML_BACKEND_TYPE_CPU ?
(char *) src1->data : (char *) src1_extra->data_device[g_main_device_index];
- char * dst_original = dst->backend == GGML_BACKEND_CPU ?
+ char * dst_original = dst->backend == GGML_BACKEND_TYPE_CPU ?
(char *) dst->data : (char *) dst_extra->data_device[g_main_device_index];
if (src1->ne[1] == 1) {
- GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
- GGML_ASSERT(dst->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
+ GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
//int32_t row_id;
}
}
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
}
}
const int64_t ne = ggml_nelements(src0);
GGML_ASSERT(ne == ggml_nelements(src1));
- GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
- GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
+ GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
memset(extra, 0, sizeof(*extra));
for (int64_t id = 0; id < g_device_count; ++id) {
- if (backend == GGML_BACKEND_GPU && id != g_main_device_index) {
+ if (backend == GGML_BACKEND_TYPE_GPU && id != g_main_device_index) {
continue;
}
ggml_sycl_set_device(get_device_id_by_index(id));
const dpct::queue_ptr stream = g_syclStreams[id][0];
int64_t row_low, row_high;
- if (backend == GGML_BACKEND_GPU) {
+ if (backend == GGML_BACKEND_TYPE_GPU) {
row_low = 0;
row_high = nrows;
- } else if (backend == GGML_BACKEND_GPU_SPLIT) {
+ } else if (backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
const int64_t rounding = get_row_rounding(tensor->type);
row_low = id == 0 ? 0 : nrows*g_tensor_split[id];
extra->data_device[id] = buf;
- if (backend == GGML_BACKEND_GPU_SPLIT) {
+ if (backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
for (int64_t is = 0; is < MAX_STREAMS; ++is) {
SYCL_CHECK(CHECK_TRY_ERROR(extra->events[id][is] =
new sycl::event()));
}
void ggml_sycl_free_data(struct ggml_tensor *tensor) try {
- if (!tensor || !tensor->extra || (tensor->backend != GGML_BACKEND_GPU && tensor->backend != GGML_BACKEND_GPU_SPLIT) ) {
+ if (!tensor || !tensor->extra || (tensor->backend != GGML_BACKEND_TYPE_GPU && tensor->backend != GGML_BACKEND_TYPE_GPU_SPLIT) ) {
return;
}
return;
}
- tensor->backend = GGML_BACKEND_GPU;
+ tensor->backend = GGML_BACKEND_TYPE_GPU;
- if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_CPU) {
+ if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU) {
const ggml_op src0_op = tensor->src[0]->op;
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW || src0_op == GGML_OP_PERMUTE) {
ggml_sycl_assign_buffers_impl(tensor->src[0], scratch, force_inplace, no_alloc);
}
}
- if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend == GGML_BACKEND_CPU) {
+ if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU) {
ggml_sycl_assign_buffers_impl(tensor->src[1], scratch, force_inplace, no_alloc);
}
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
const dpct::queue_ptr stream = g_syclStreams[g_main_device_index][0];
- if (inplace && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) {
+ if (inplace && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT)) {
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src[0]->extra;
char * src0_ddc = (char *) src0_extra->data_device[g_main_device_index];
size_t offset = 0;
const bool inplace = tensor->view_src != nullptr;
- if (inplace && (tensor->view_src->backend == GGML_BACKEND_GPU || tensor->view_src->backend == GGML_BACKEND_GPU_SPLIT)) {
+ if (inplace && (tensor->view_src->backend == GGML_BACKEND_TYPE_GPU || tensor->view_src->backend == GGML_BACKEND_TYPE_GPU_SPLIT)) {
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->view_src->extra;
char * src0_ddc = (char *) src0_extra->data_device[g_main_device_index];
size_t view_offset = 0;
}
void ggml_sycl_copy_to_device(struct ggml_tensor *tensor) try {
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
GGML_ASSERT(ggml_is_contiguous(tensor));
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
if (!g_sycl_loaded) return false;
ggml_sycl_func_t func;
- const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
- || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
- || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU);
+ const bool any_on_device = tensor->backend == GGML_BACKEND_TYPE_GPU
+ || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
+ || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_TYPE_GPU);
if (!any_on_device && tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_MUL_MAT_ID) {
return false;
return false;
}
- if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT) {
+ if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
ggml_sycl_set_peer_access(tensor->src[1]->ne[1]);
}
if (params->ith != 0) {
return true;
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return true;
}
func(tensor->src[0], tensor->src[1], tensor);
extra->data_device[ctx->device] = tensor->data;
- tensor->backend = GGML_BACKEND_GPU;
+ tensor->backend = GGML_BACKEND_TYPE_GPU;
tensor->extra = extra;
if (ggml_is_quantized(tensor->type)) {
ggml_tensor *tensor,
const void *data, size_t offset,
size_t size) try {
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
const ggml_tensor *tensor,
void *data, size_t offset,
size_t size) try {
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
(char *)tensor->data + offset, data, size)));
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
data, (const char *)tensor->data + offset, size)));
ggml_sycl_set_main_device(sycl_ctx->device);
ggml_compute_params params = {};
- params.type = GGML_TASK_COMPUTE;
+ params.type = GGML_TASK_TYPE_COMPUTE;
params.ith = 0;
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i];
if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE)
continue;
- assert(node->backend == GGML_BACKEND_GPU);
+ assert(node->backend == GGML_BACKEND_TYPE_GPU);
assert(node->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device));
assert(node->extra != nullptr);
for (int j = 0; j < GGML_MAX_SRC; j++) {
if (node->src[j] != nullptr) {
- assert(node->src[j]->backend == GGML_BACKEND_GPU);
+ assert(node->src[j]->backend == GGML_BACKEND_TYPE_GPU);
assert(node->src[j]->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device));
assert(node->src[j]->extra != nullptr);
}
src1_uma = d_Qy != nullptr;
}
- const bool load_x = src0->backend != GGML_BACKEND_GPU && !src0_uma;
- const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
+ const bool load_x = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
+ const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
// compute
ggml_vk_matmul(ctx, subctx, *pipeline, { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne12 * ne13 }, { ctx->prealloc_split_k, 0, d_sz * ne12 * ne13 * split_k }, ne01, ne11, ne10, ne10, ne10, ne01, split_k, ne12*ne13, ne02, ne12, r2, r3, stride_batch_x, stride_batch_y, ne20*ne21); // NOLINT
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
// copy dst to host
float * d = (float *) ((char *) dst->data);
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, sizeof(float) * d_ne * ne12 * ne13);
src1_uma = d_Qy != nullptr;
}
- const bool load_x = src0->backend != GGML_BACKEND_GPU && !src0_uma;
- const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
+ const bool load_x = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
+ const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
ggml_vk_sync_buffers(subctx);
ggml_vk_dispatch_pipeline(ctx, subctx, *dmmv, { { d_X, x_offset, x_sz }, { d_Y, y_buffer_offset, y_sz + y_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 3 * sizeof(int), &pc, { (uint32_t)ne01, 1, 1});
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
// copy dst to host
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
ggml_vk_sync_buffers(subctx);
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
#endif
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
- GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // NOLINT
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // NOLINT
GGML_ASSERT(src0->type == GGML_TYPE_F16);
src1_uma = d_Qy != nullptr;
}
- const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
+ const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
const uint64_t x_ne = ne00 * ne01 * ne02;
const uint64_t y_ne = ne10 * ne11 * ne12;
ggml_vk_sync_buffers(subctx);
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_p021_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
// copy dst to host
float * d = (float *) dst->data;
ggml_vk_sync_buffers(subctx);
GGML_ASSERT(!ggml_is_transposed(src0));
GGML_ASSERT(!ggml_is_transposed(src1));
GGML_ASSERT(!ggml_is_permuted(src0));
- GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
src1_uma = d_Qy != nullptr;
}
- const bool load_y = src1->backend != GGML_BACKEND_GPU && !src1_uma;
+ const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
const uint64_t d_ne = ne01 * ne11 * ne12;
ggml_vk_sync_buffers(subctx);
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_nc_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
// copy dst to host
float * d = (float *) dst->data;
ggml_vk_sync_buffers(subctx);
return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
(src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16 || ggml_is_quantized(src1->type)) &&
dst->type == GGML_TYPE_F32 &&
- ((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_GPU);
+ ((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_TYPE_GPU);
}
static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context * subctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
// TODO: support for transposed / permuted tensors
GGML_ASSERT(nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
- GGML_ASSERT(src0->backend == GGML_BACKEND_GPU);
- GGML_ASSERT(dst->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
+ GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
}
}
- const bool transfer_src0 = src0->backend != GGML_BACKEND_GPU && !src0_uma;
- const bool transfer_src1 = use_src1 && src1->backend != GGML_BACKEND_GPU && !src1_uma;
+ const bool transfer_src0 = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
+ const bool transfer_src1 = use_src1 && src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
uint64_t x_sz = ggml_vk_align_size(ggml_type_size(src0->type) * ne0, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment);
uint64_t y_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * ne1, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) : 0;
vk_buffer d_D = extra->buffer_gpu.lock();
// Workaround for tiny tensor inputs on ROPE
- if (use_src1 && src1->backend == GGML_BACKEND_GPU && y_sz > d_D->size) {
+ if (use_src1 && src1->backend == GGML_BACKEND_TYPE_GPU && y_sz > d_D->size) {
y_sz = VK_WHOLE_SIZE;
}
ggml_vk_sync_buffers(subctx);
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
}
- if (dst->backend == GGML_BACKEND_CPU && op == GGML_OP_CPY) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU && op == GGML_OP_CPY) {
ggml_vk_d2h_tensor_2d(ctx, subctx, d_D, 0, dst);
- } else if(dst->backend == GGML_BACKEND_CPU) {
+ } else if(dst->backend == GGML_BACKEND_TYPE_CPU) {
// copy dst to host
float * d = (float *) dst->data;
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, d_sz);
ggml_vk_sync_buffers(subctx);
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements);
}
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
// copy dst to host
ggml_vk_buffer_read_async(ctx, subctx, d_D, d_buf_offset + d_offset, (char *) dst->data + i02*nb2 + i03*nb3, d_sz);
}
static void ggml_vk_nop(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) {
// If backend is CPU, data from src0 has to be copied off the device
- if (dst->backend == GGML_BACKEND_CPU) {
+ if (dst->backend == GGML_BACKEND_TYPE_CPU) {
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
vk_buffer d_D = extra_src0->buffer_gpu.lock();
ggml_vk_sync_buffers(subctx);
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl;
#endif
- const bool any_on_device = node->backend == GGML_BACKEND_GPU
- || (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_GPU || node->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
- || (node->src[1] != nullptr && (node->src[1]->backend == GGML_BACKEND_GPU));
+ const bool any_on_device = node->backend == GGML_BACKEND_TYPE_GPU
+ || (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_TYPE_GPU || node->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
+ || (node->src[1] != nullptr && (node->src[1]->backend == GGML_BACKEND_TYPE_GPU));
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT)) {
return;
}
static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, bool last_node){
- const bool any_on_device = node->backend == GGML_BACKEND_GPU
- || (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_GPU || node->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
- || (node->src[1] != nullptr && node->src[1]->backend == GGML_BACKEND_GPU);
+ const bool any_on_device = node->backend == GGML_BACKEND_TYPE_GPU
+ || (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_TYPE_GPU || node->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
+ || (node->src[1] != nullptr && node->src[1]->backend == GGML_BACKEND_TYPE_GPU);
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT) || (node->op == GGML_OP_MUL_MAT && !any_on_device && !ggml_vk_can_mul_mat(node->src[0], node->src[1], node))) {
return;
last_node = true;
#endif
- if (node->backend == GGML_BACKEND_CPU || last_node) {
+ if (node->backend == GGML_BACKEND_TYPE_CPU || last_node) {
ggml_vk_ctx_end(ctx->compute_ctx);
ctx->compute_ctx->exit_tensor = node;
ctx->compute_ctx = nullptr;
}
static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor){
- const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
- || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
- || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU);
+ const bool any_on_device = tensor->backend == GGML_BACKEND_TYPE_GPU
+ || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
+ || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_TYPE_GPU);
if (ctx->disable || (!any_on_device && tensor->op != GGML_OP_MUL_MAT)) {
return false;
if (params->ith != 0) {
return true;
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return true;
}
extra->offset = (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
}
- tensor->backend = GGML_BACKEND_GPU;
+ tensor->backend = GGML_BACKEND_TYPE_GPU;
tensor->extra = extra;
}
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
#endif
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
#endif
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
#endif
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
#endif
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
- GGML_ASSERT(tensor->backend == GGML_BACKEND_GPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
int last_node = cgraph->n_nodes - 1;
// If the last op in the cgraph isn't backend GPU, the command buffer doesn't get closed properly
- while (last_node > 0 && cgraph->nodes[last_node]->backend != GGML_BACKEND_GPU) {
+ while (last_node > 0 && cgraph->nodes[last_node]->backend != GGML_BACKEND_TYPE_GPU) {
last_node -= 1;
}
}
ggml_compute_params params = {};
- params.type = GGML_TASK_COMPUTE;
+ params.type = GGML_TASK_TYPE_COMPUTE;
params.ith = 0;
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor * node = cgraph->nodes[i];
static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tensor * tensor, const char * name) {
void * tensor_data = tensor->data;
- if (tensor->backend == GGML_BACKEND_GPU) {
+ if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
const size_t tensor_size = ggml_nbytes(tensor);
tensor_data = malloc(tensor_size);
std::vector<const ggml_tensor *> done;
ggml_vk_print_graph_origin(tensor, done);
- if (tensor->backend == GGML_BACKEND_GPU) {
+ if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
free(tensor_data);
}
}
static void ggml_vk_check_tensor(const std::string& name, const ggml_tensor * tensor) {
return;
- GGML_ASSERT(tensor->backend == GGML_BACKEND_CPU);
+ GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_CPU);
if (tensor->type != GGML_TYPE_F32 && tensor->type != GGML_TYPE_F16) {
return;
}
if (params->ith != 0) {
return;
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
return;
}
src0_buffer = malloc(src0_size);
src0_clone->data = src0_buffer;
- if (src0->backend == GGML_BACKEND_CPU) {
+ if (src0->backend == GGML_BACKEND_TYPE_CPU) {
memcpy(src0_clone->data, src0->data, src0_size);
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
- } else if (src0->backend == GGML_BACKEND_GPU) {
+ } else if (src0->backend == GGML_BACKEND_TYPE_GPU) {
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
uint64_t offset = extra->offset;
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
src1_buffer = malloc(src1_size);
src1_clone->data = src1_buffer;
- if (src1->backend == GGML_BACKEND_CPU) {
+ if (src1->backend == GGML_BACKEND_TYPE_CPU) {
memcpy(src1_clone->data, src1->data, src1_size);
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
- } else if (src1->backend == GGML_BACKEND_GPU) {
+ } else if (src1->backend == GGML_BACKEND_TYPE_GPU) {
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
uint64_t offset = extra->offset;
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
if (params->ith != 0) {
return;
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
return;
}
if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) {
void * tensor_data = tensor->data;
- if (tensor->backend == GGML_BACKEND_GPU) {
+ if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
size_t tensor_size = ggml_nbytes(tensor);
tensor_data = malloc(tensor_size);
comp_result = nullptr;
comp_size = 0;
- if (tensor->backend == GGML_BACKEND_GPU) {
+ if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
free(tensor_data);
}
}
}
}
- struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
+ struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
*result = (struct ggml_tensor) {
/*.type =*/ type,
- /*.backend =*/ GGML_BACKEND_CPU,
+ /*.backend =*/ GGML_BACKEND_TYPE_CPU,
/*.buffer =*/ NULL,
/*.ne =*/ { 1, 1, 1, 1 },
/*.nb =*/ { 0, 0, 0, 0 },
char * const mem_buffer = ctx->mem_buffer;
while (obj != NULL) {
- if (obj->type == GGML_OBJECT_TENSOR) {
+ if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
return (struct ggml_tensor *)(mem_buffer + obj->offs);
}
char * const mem_buffer = ctx->mem_buffer;
while (obj != NULL) {
- if (obj->type == GGML_OBJECT_TENSOR) {
+ if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
return (struct ggml_tensor *)(mem_buffer + obj->offs);
}
char * const mem_buffer = ctx->mem_buffer;
while (obj != NULL) {
- if (obj->type == GGML_OBJECT_TENSOR) {
+ if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
if (strcmp(cur->name, name) == 0) {
return cur;
int k) {
GGML_ASSERT(a->ne[0] >= k);
- struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_DESC);
+ struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
result = ggml_view_4d(ctx, result,
k, result->ne[1], result->ne[2], result->ne[3],
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
GGML_ASSERT(src0->type == dst->type);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
GGML_ASSERT(src0->type == dst->type);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const int nth = params->nth;
#ifdef GGML_USE_CLBLAST
- if (src1->backend == GGML_BACKEND_GPU) {
+ if (src1->backend == GGML_BACKEND_TYPE_GPU) {
// TODO: OpenCL kernel support full broadcast
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
if (ith == 0) {
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
size_t offset = ((int32_t *) dst->op_params)[3];
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
- if (!inplace && (params->type == GGML_TASK_INIT)) {
+ if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
if (params->ith != 0) {
return;
}
ggml_nbytes(dst));
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const int ith = params->ith;
const int nth = params->nth;
#if defined(GGML_USE_CLBLAST)
- if (src1->backend == GGML_BACKEND_GPU) {
+ if (src1->backend == GGML_BACKEND_TYPE_GPU) {
// TODO: OpenCL kernel support full broadcast
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
if (ith == 0) {
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_is_scalar(dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_is_scalar(dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_can_repeat(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_can_repeat(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_can_repeat(dst, src0));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_are_same_shape(src0, grad));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
#if defined(GGML_USE_CLBLAST)
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
- if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
+ if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
}
return;
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
UNUSED(desired_wsize);
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (type != GGML_TYPE_F32) {
assert(params->wsize >= desired_wsize);
// parallelize by src0 rows
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
}
#endif
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith != 0) {
return;
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith != 0) {
return;
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
#endif
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
if (use_blas) {
return;
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith != 0) {
return;
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_is_contiguous(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
size_t offset = ((int32_t *) dst->op_params)[3];
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
- if (!inplace && (params->type == GGML_TASK_INIT)) {
+ if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
if (params->ith != 0) {
return;
}
ggml_nbytes(dst));
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (params->ith != 0) {
return;
}
memset(dst->data, 0, ggml_nbytes(dst));
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (params->ith != 0) {
return;
}
memset(dst->data, 0, ggml_nbytes(dst));
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(n_past >= 0);
- if (!inplace && (params->type == GGML_TASK_INIT)) {
+ if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
if (ith != 0) {
return;
}
ggml_nbytes(dst));
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(ggml_is_contiguous(dst));
assert(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_are_same_shape(src1, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith != 0) {
return;
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb00 == sizeof(float));
GGML_ASSERT(nb10 == sizeof(float));
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith != 0) {
return;
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith != 0) {
return;
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(src->type == GGML_TYPE_F32);
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(src->type == GGML_TYPE_F32);
GGML_ASSERT(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
// C doesn't have a functional sort, so we do a bubble sort instead
for (int64_t j = 0; j < ne0; j++) {
for (int64_t k = j + 1; k < ne0; k++) {
- if ((order == GGML_SORT_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
- (order == GGML_SORT_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
+ if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
+ (order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
int32_t tmp = dst_data[j];
dst_data[j] = dst_data[k];
dst_data[k] = tmp;
GGML_ASSERT(nb1 <= nb2);
GGML_ASSERT(nb2 <= nb3);
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb1 <= nb2);
GGML_ASSERT(nb2 <= nb3);
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb1 <= nb2);
GGML_ASSERT(nb2 <= nb3);
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(nb1 <= nb2);
GGML_ASSERT(nb2 <= nb3);
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith == 0) {
memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src0 = dst->src[0];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * src2 = dst->src[2];
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
- if (!inplace && params->type == GGML_TASK_INIT) {
+ if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
if (params->ith != 0) {
return;
}
memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
return;
}
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
assert(params->ith == 0);
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * a = dst->src[0];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * a = dst->src[0];
const struct ggml_tensor * b = dst->src[1];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
const struct ggml_tensor * b = dst->src[1];
const struct ggml_tensor * c = dst->src[2];
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
- if (params->type == GGML_TASK_INIT) {
+ if (params->type == GGML_TASK_TYPE_INIT) {
if (ith == 0) {
memset(sums, 0, sizeof(float) * (nth + nth * nc));
}
return;
}
- if (params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_FINALIZE) {
if (ith == 0) {
float * dp = (float *) dst->data;
ggml_vec_sum_f32(nth, dp, sums);
const int64_t ith = params->ith;
const int64_t nth = params->nth;
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return;
}
if (skip_cpu) {
return;
}
- GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
- GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
+ GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
+ GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
#elif defined(GGML_USE_VULKAN)
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
#ifdef GGML_VULKAN_CHECK_RESULTS
if (skip_cpu) {
return;
}
- GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
- GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
+ GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
+ GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
#endif // GGML_USE_CUBLAS
#ifdef GGML_USE_SYCL
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
const size_t obj_size = ggml_graph_nbytes(size, grads);
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
set_numa_thread_affinity(state->ith);
int node_n = -1;
- int task_phase = GGML_TASK_FINALIZE;
+ int task_phase = GGML_TASK_TYPE_FINALIZE;
while (true) {
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
// all other threads are finished and spinning
// do finalize and init here so we don't have synchronize again
struct ggml_compute_params params = {
- /*.type =*/ GGML_TASK_FINALIZE,
+ /*.type =*/ GGML_TASK_TYPE_FINALIZE,
/*.ith =*/ 0,
/*.nth =*/ 0,
/*.wsize =*/ cplan->work_size,
if (n_tasks == 1) {
/* INIT */
if (GGML_OP_HAS_INIT[node->op]) {
- params.type = GGML_TASK_INIT;
+ params.type = GGML_TASK_TYPE_INIT;
ggml_compute_forward(¶ms, node);
}
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
// they do something more efficient than spinning (?)
- params.type = GGML_TASK_COMPUTE;
+ params.type = GGML_TASK_TYPE_COMPUTE;
ggml_compute_forward(¶ms, node);
if (GGML_OP_HAS_FINALIZE[node->op]) {
- params.type = GGML_TASK_FINALIZE;
+ params.type = GGML_TASK_TYPE_FINALIZE;
ggml_compute_forward(¶ms, node);
}
}
}
- task_phase = GGML_TASK_INIT;
+ task_phase = GGML_TASK_TYPE_INIT;
atomic_store(&state->shared->n_active, n_threads);
atomic_store(&state->shared->node_n, node_n);
atomic_store(&state->shared->node_task, task_phase);
const int n_tasks = ggml_get_n_tasks(node, n_threads);
struct ggml_compute_params params = {
- /*.type =*/ GGML_TASK_INIT,
+ /*.type =*/ GGML_TASK_TYPE_INIT,
/*.ith =*/ state->ith,
/*.nth =*/ n_tasks,
/*.wsize =*/ cplan->work_size,
}
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
- task_phase = GGML_TASK_COMPUTE;
+ task_phase = GGML_TASK_TYPE_COMPUTE;
atomic_store(&state->shared->n_active, n_threads);
atomic_store(&state->shared->node_task, task_phase);
}
}
if (state->ith < n_tasks) {
- params.type = GGML_TASK_COMPUTE;
+ params.type = GGML_TASK_TYPE_COMPUTE;
ggml_compute_forward(¶ms, node);
}
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
- task_phase = GGML_TASK_FINALIZE;
+ task_phase = GGML_TASK_TYPE_FINALIZE;
atomic_store(&state->shared->n_active, n_threads);
atomic_store(&state->shared->node_task, task_phase);
}
/*.n_threads =*/ n_threads,
/*.n_active =*/ n_threads,
/*.node_n =*/ -1,
- /*.node_task =*/ GGML_TASK_FINALIZE,
+ /*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
/*.abort_callback =*/ NULL,
/*.abort_callback_data =*/ NULL,
};
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
bool cancel = false;
if (callback) {
callback(callback_data, accum_step, &sched, &cancel);
if (cancel) {
- return GGML_OPT_CANCEL;
+ return GGML_OPT_RESULT_CANCEL;
}
}
// ggml_graph_reset (gf);
if (callback) {
callback(callback_data, accum_step, &sched, &cancel);
if (cancel) {
- return GGML_OPT_CANCEL;;
+ return GGML_OPT_RESULT_CANCEL;;
}
}
// ggml_graph_reset (gf);
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
GGML_PRINT_DEBUG("converged\n");
- return GGML_OPT_OK;
+ return GGML_OPT_RESULT_OK;
}
// delta-based convergence test
const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
if (fabsf(rate) < params.delta) {
- return GGML_OPT_OK;
+ return GGML_OPT_RESULT_OK;
}
}
++n_no_improvement[0];
if (n_no_improvement[0] >= params.max_no_improvement) {
- return GGML_OPT_OK;
+ return GGML_OPT_RESULT_OK;
}
}
}
}
}
- return GGML_OPT_DID_NOT_CONVERGE;
+ return GGML_OPT_RESULT_DID_NOT_CONVERGE;
}
//
float sched = 0;
callback(callback_data, accum_step, &sched, cancel);
if (*cancel) {
- return GGML_OPT_CANCEL;
+ return GGML_OPT_RESULT_CANCEL;
}
}
// ggml_graph_reset (gf);
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
- return GGML_OPT_INVALID_WOLFE;
+ return GGML_OPT_RESULT_INVALID_WOLFE;
}
}
}
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
- struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
float * x = opt->lbfgs.x->data; // current parameters
float sched = 0;
callback(callback_data, accum_step, &sched, &cancel);
if (cancel) {
- return GGML_OPT_CANCEL;
+ return GGML_OPT_RESULT_CANCEL;
}
}
// ggml_graph_reset (gf);
// already optimized
if (gnorm/xnorm <= params.lbfgs.eps) {
- return GGML_OPT_OK;
+ return GGML_OPT_RESULT_OK;
}
if (opt->just_initialized) {
// way to test and don't want to break something with so many changes lined up
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
if (cancel) {
- return GGML_OPT_CANCEL;
+ return GGML_OPT_RESULT_CANCEL;
}
if (ls < 0) {
}
if (gnorm/xnorm <= params.lbfgs.eps) {
// converged
- return GGML_OPT_OK;
+ return GGML_OPT_RESULT_OK;
}
// delta-based convergence test
const float rate = (pf[k[0]%params.past] - fx)/fx;
if (fabsf(rate) < params.delta) {
- return GGML_OPT_OK;
+ return GGML_OPT_RESULT_OK;
}
}
n_no_improvement[0]++;
if (n_no_improvement[0] >= params.max_no_improvement) {
- return GGML_OPT_OK;
+ return GGML_OPT_RESULT_OK;
}
}
}
if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
// reached the maximum number of iterations
- return GGML_OPT_DID_NOT_CONVERGE;
+ return GGML_OPT_RESULT_DID_NOT_CONVERGE;
}
// update vectors s and y:
GGML_ASSERT(false && "lbfgs failed");
- return GGML_OPT_DID_NOT_CONVERGE;
+ return GGML_OPT_RESULT_DID_NOT_CONVERGE;
}
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
struct ggml_opt_params result;
switch (type) {
- case GGML_OPT_ADAM:
+ case GGML_OPT_TYPE_ADAM:
{
result = (struct ggml_opt_params) {
- .type = GGML_OPT_ADAM,
+ .type = GGML_OPT_TYPE_ADAM,
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
.past = 0,
},
};
} break;
- case GGML_OPT_LBFGS:
+ case GGML_OPT_TYPE_LBFGS:
{
result = (struct ggml_opt_params) {
- .type = GGML_OPT_LBFGS,
+ .type = GGML_OPT_TYPE_LBFGS,
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
.n_threads = 1,
.past = 0,
opt->just_initialized = true;
if (opt->ctx == NULL) {
struct ggml_init_params ctx_opt_params;
- if (opt->params.type == GGML_OPT_ADAM) {
+ if (opt->params.type == GGML_OPT_TYPE_ADAM) {
ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
if (opt->params.past > 0) {
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
}
- } else if (opt->params.type == GGML_OPT_LBFGS) {
+ } else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
if (opt->params.past > 0) {
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
opt->ctx = ggml_init(ctx_opt_params);
}
switch (opt->params.type) {
- case GGML_OPT_ADAM:
+ case GGML_OPT_TYPE_ADAM:
{
opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
ggml_set_zero(opt->adam.pf);
}
} break;
- case GGML_OPT_LBFGS:
+ case GGML_OPT_TYPE_LBFGS:
{
opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
ctx = ggml_init(params_ctx);
if (ctx == NULL) {
- return GGML_OPT_NO_CONTEXT;
+ return GGML_OPT_RESULT_NO_CONTEXT;
}
free_ctx = true;
}
- enum ggml_opt_result result = GGML_OPT_OK;
+ enum ggml_opt_result result = GGML_OPT_RESULT_OK;
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
void * callback_data) {
// build forward + backward compute graphs
- enum ggml_opt_result result = GGML_OPT_OK;
+ enum ggml_opt_result result = GGML_OPT_RESULT_OK;
switch (opt->params.type) {
- case GGML_OPT_ADAM:
+ case GGML_OPT_TYPE_ADAM:
{
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
} break;
- case GGML_OPT_LBFGS:
+ case GGML_OPT_TYPE_LBFGS:
{
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
} break;
};
enum ggml_backend_type {
- GGML_BACKEND_CPU = 0,
- GGML_BACKEND_GPU = 10,
- GGML_BACKEND_GPU_SPLIT = 20,
+ GGML_BACKEND_TYPE_CPU = 0,
+ GGML_BACKEND_TYPE_GPU = 10,
+ GGML_BACKEND_TYPE_GPU_SPLIT = 20,
};
// model file types
};
enum ggml_object_type {
- GGML_OBJECT_TENSOR,
- GGML_OBJECT_GRAPH,
- GGML_OBJECT_WORK_BUFFER
+ GGML_OBJECT_TYPE_TENSOR,
+ GGML_OBJECT_TYPE_GRAPH,
+ GGML_OBJECT_TYPE_WORK_BUFFER
};
enum ggml_log_level {
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
enum ggml_task_type {
- GGML_TASK_INIT = 0,
- GGML_TASK_COMPUTE,
- GGML_TASK_FINALIZE,
+ GGML_TASK_TYPE_INIT = 0,
+ GGML_TASK_TYPE_COMPUTE,
+ GGML_TASK_TYPE_FINALIZE,
};
struct ggml_compute_params {
// sort rows
enum ggml_sort_order {
- GGML_SORT_ASC,
- GGML_SORT_DESC,
+ GGML_SORT_ORDER_ASC,
+ GGML_SORT_ORDER_DESC,
};
GGML_API struct ggml_tensor * ggml_argsort(
// optimization methods
enum ggml_opt_type {
- GGML_OPT_ADAM,
- GGML_OPT_LBFGS,
+ GGML_OPT_TYPE_ADAM,
+ GGML_OPT_TYPE_LBFGS,
};
// linesearch methods
// optimization return values
enum ggml_opt_result {
- GGML_OPT_OK = 0,
- GGML_OPT_DID_NOT_CONVERGE,
- GGML_OPT_NO_CONTEXT,
- GGML_OPT_INVALID_WOLFE,
- GGML_OPT_FAIL,
- GGML_OPT_CANCEL,
+ GGML_OPT_RESULT_OK = 0,
+ GGML_OPT_RESULT_DID_NOT_CONVERGE,
+ GGML_OPT_RESULT_NO_CONTEXT,
+ GGML_OPT_RESULT_INVALID_WOLFE,
+ GGML_OPT_RESULT_FAIL,
+ GGML_OPT_RESULT_CANCEL,
GGML_LINESEARCH_FAIL = -128,
GGML_LINESEARCH_MINIMUM_STEP,