tags
.build/
build*
+release
+debug
!build-info.cmake
!build-info.cpp.in
!build-info.sh
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
-- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code
+- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` (from clang-tools v15+) to format the added code
- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggml-org/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
};
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
-GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
+GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
// Graph allocator
/*
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
- GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+ GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
// Tensor initialization
- GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
- GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
+ GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
+ GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);
// CPU buffer types are always available
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
return talloc;
}
-void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
+enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
size = GGML_PAD(size, talloc->alignment);
assert(((uintptr_t)addr % talloc->alignment) == 0);
- ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
+ return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
}
// dynamic tensor allocator
// utils
+static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
+ for (size_t i = 0; i < *n_buffers; i++) {
+ ggml_backend_buffer_free((*buffers)[i]);
+ }
+ free(*buffers);
+}
+
static bool alloc_tensor_range(struct ggml_context * ctx,
struct ggml_tensor * first, struct ggml_tensor * last,
ggml_backend_buffer_type_t buft, size_t size,
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
+
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
if (buffer == NULL) {
-#ifndef NDEBUG
- GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
-#endif
- for (size_t i = 0; i < *n_buffers; i++) {
- ggml_backend_buffer_free((*buffers)[i]);
- }
- free(*buffers);
+ GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
+ free_buffers(buffers, n_buffers);
return false;
}
+ *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
+ (*buffers)[(*n_buffers)++] = buffer;
+
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
+ enum ggml_status status = GGML_STATUS_SUCCESS;
if (t->data == NULL) {
if (t->view_src == NULL) {
- ggml_tallocr_alloc(&tallocr, t);
+ status = ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
- ggml_backend_view_init(t);
+ status = ggml_backend_view_init(t);
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
- ggml_backend_view_init(t);
+ status = ggml_backend_view_init(t);
}
}
+ if (status != GGML_STATUS_SUCCESS) {
+ GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
+ free_buffers(buffers, n_buffers);
+ return false;
+ }
}
- *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
- (*buffers)[(*n_buffers)++] = buffer;
-
return true;
}
// base address of the buffer
void * (*get_base) (ggml_backend_buffer_t buffer);
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
- void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+ enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
// tensor data access
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
return base;
}
-void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
+enum ggml_status ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
// init_tensor is optional
if (buffer->iface.init_tensor) {
- buffer->iface.init_tensor(buffer, tensor);
+ return buffer->iface.init_tensor(buffer, tensor);
}
+ return GGML_STATUS_SUCCESS;
}
void ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
// utils
-void ggml_backend_view_init(struct ggml_tensor * tensor) {
+enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor) {
GGML_ASSERT(tensor->buffer == NULL);
GGML_ASSERT(tensor->view_src != NULL);
GGML_ASSERT(tensor->view_src->buffer != NULL);
tensor->buffer = tensor->view_src->buffer;
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
- ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
+ return ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
}
-void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
+enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
GGML_ASSERT(tensor->buffer == NULL);
GGML_ASSERT(tensor->data == NULL);
GGML_ASSERT(tensor->view_src == NULL);
tensor->buffer = buffer;
tensor->data = addr;
- ggml_backend_buffer_init_tensor(buffer, tensor);
+ return ggml_backend_buffer_init_tensor(buffer, tensor);
}
static struct ggml_tensor * graph_copy_dup_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies,
struct ggml_tensor * dst = node_copies[id];
if (dst->view_src != NULL) {
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
- ggml_backend_view_init(dst);
+ enum ggml_status status = ggml_backend_view_init(dst);
+ GGML_ASSERT(status == GGML_STATUS_SUCCESS);
}
else {
ggml_backend_tensor_copy(src, dst);
assert(g1->n_nodes == g2->n_nodes);
for (int i = 0; i < g1->n_nodes; i++) {
- //printf("eval %d/%d\n", i, g1->n_nodes);
struct ggml_tensor * t1 = g1->nodes[i];
struct ggml_tensor * t2 = g2->nodes[i];
* @param buffer The CANN buffer from which to initialize the tensor.
* @param tensor Pointer to the tensor to be initialized.
*/
-static void ggml_backend_cann_buffer_init_tensor(
+static enum ggml_status ggml_backend_cann_buffer_init_tensor(
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
if (tensor->view_src != NULL && tensor->view_offs == 0) {
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
- return;
+ return GGML_STATUS_SUCCESS;
}
// TODO: can backend doesn't support quantized yet. Just leave the code
memset_size, 0, memset_size));
}
}
+ return GGML_STATUS_SUCCESS;
}
// TODO: need handle tensor which has paddings.
return (void *) (buffer->context);
}
-static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
+static enum ggml_status ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);
GGML_UNUSED(buffer);
+ return GGML_STATUS_SUCCESS;
}
static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
return nullptr;
}
-static void ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
+static enum ggml_status ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
tensor->extra = (void *) const_cast<ggml::cpu::tensor_traits *>(ggml_aarch64_get_optimal_repack_type(tensor));
GGML_UNUSED(buffer);
+ return GGML_STATUS_SUCCESS;
}
static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
return ctx->dev_ptr;
}
-static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
+static enum ggml_status ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
if (tensor->view_src != NULL) {
assert(tensor->view_src->buffer->buft == buffer->buft);
- return;
+ return GGML_STATUS_SUCCESS;
}
if (ggml_is_quantized(tensor->type) && tensor->view_src == nullptr && ggml_backend_buffer_get_usage(buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size));
}
}
+ return GGML_STATUS_SUCCESS;
}
static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
GGML_UNUSED(buffer);
}
-static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
+static enum ggml_status ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
}
}
tensor->extra = extra;
+ return GGML_STATUS_SUCCESS;
}
static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
GGML_UNUSED(buffer);
}
-static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
+static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
ggml_cl2_init(buffer->buft->device);
tensor->extra = extra;
}
}
+ return GGML_STATUS_SUCCESS;
}
// The optimized gemm and gemv kernels are used for large matrices without batch.
return result;
}
-static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
+static enum ggml_status ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
// CUDA backend on the server pads everything to 512 due to CUDA limitations.
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_INIT_TENSOR, &request, sizeof(request), nullptr, 0);
GGML_ASSERT(status);
}
+ return GGML_STATUS_SUCCESS;
}
static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
return ctx->dev_ptr;
}
-static void
+static enum ggml_status
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor) try {
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
if (tensor->view_src != NULL) {
assert(tensor->view_src->buffer->buft == buffer->buft);
- return;
+ return GGML_STATUS_SUCCESS;
}
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
padded_size - original_size).wait()));
}
}
+ return GGML_STATUS_SUCCESS;
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
GGML_UNUSED(buffer);
}
-static void
+static enum ggml_status
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor) try {
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
}
}
tensor->extra = extra;
+ return GGML_STATUS_SUCCESS;
}
catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
UNUSED(buffer);
}
-static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
+static enum ggml_status ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")");
if (tensor->view_src != nullptr) {
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
}
+ return GGML_STATUS_SUCCESS;
}
static void ggml_backend_vk_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
#include <ggml.h>
#include <ggml-alloc.h>
#include <ggml-backend.h>
+#include <ggml-cpp.h>
#include <algorithm>
#include <array>
// allocate
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
+
if (buf == NULL) {
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1));
ggml_free(ctx);
/* .mem_base = */ NULL,
/* .no_alloc = */ true,
};
- ggml_context * ctx = ggml_init(params);
+ ggml_context_ptr ctx(ggml_init(params)); // smart ptr
GGML_ASSERT(ctx);
- ggml_tensor * out = build_graph(ctx);
+ ggml_tensor * out = build_graph(ctx.get());
if (op_name != nullptr && op_desc(out) != op_name) {
//printf(" %s: skipping\n", op_desc(out).c_str());
- ggml_free(ctx);
return true;
}
// check if backends support op
if (!ggml_backend_supports_op(backend, out)) {
printf("not supported\n");
- ggml_free(ctx);
return true;
}
printf("%*s", last - len, "");
// allocate
- ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
+ ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
+
if (buf == NULL) {
printf("failed to allocate tensors\n");
- ggml_free(ctx);
return false;
}
// randomize tensors
- initialize_tensors(ctx);
+ initialize_tensors(ctx.get());
// build graph
- ggml_cgraph * gf = ggml_new_graph_custom(ctx, graph_nodes, false);
+ ggml_cgraph * gf = ggml_new_graph_custom(ctx.get(), graph_nodes, false);
ggml_build_forward_expand(gf, out);
// warmup run
- ggml_backend_graph_compute(backend, gf);
+ ggml_status status = ggml_backend_graph_compute(backend, gf);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
// determine number of runs
int n_runs;
int total_runs = 0;
do {
int64_t start_time = ggml_time_us();
- ggml_backend_graph_compute(backend, gf);
+ ggml_status status = ggml_backend_graph_compute(backend, gf);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
int64_t end_time = ggml_time_us();
total_time_us += end_time - start_time;
}
printf("\n");
- ggml_backend_buffer_free(buf);
-
- ggml_free(ctx);
-
return true;
}
/* .mem_base = */ NULL,
/* .no_alloc = */ true,
};
- ggml_context * ctx = ggml_init(params);
+ ggml_context_ptr ctx(ggml_init(params)); // smart ptr
GGML_ASSERT(ctx);
- gf = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
- gb = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
+ gf = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
+ gb = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
- ggml_tensor * out = build_graph(ctx);
+ ggml_tensor * out = build_graph(ctx.get());
if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) {
//printf(" %s: skipping\n", op_desc(out).c_str());
- ggml_free(ctx);
return true;
}
fflush(stdout);
if (out->type != GGML_TYPE_F32) {
- ggml_free(ctx);
printf("not supported [%s->type != FP32]\n", out->name);
return true;
}
// check if the backend supports the ops
bool supported = true;
bool any_params = false;
- for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+ for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
if (!ggml_backend_supports_op(backend, t)) {
printf("not supported [%s] ", ggml_backend_name(backend));
supported = false;
}
if (!supported) {
printf("\n");
- ggml_free(ctx);
return true;
}
int64_t ngrads = 0;
- for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+ for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
if (t->flags & GGML_TENSOR_FLAG_PARAM) {
ngrads += ggml_nelements(t);
}
}
if (ngrads > grad_nmax()) {
printf("skipping large tensors for speed \n");
- ggml_free(ctx);
return true;
}
if (!ggml_is_scalar(out)) {
- out = ggml_sum(ctx, out);
+ out = ggml_sum(ctx.get(), out);
ggml_set_name(out, "sum_of_out");
}
ggml_set_loss(out);
ggml_build_forward_expand(gf, out);
ggml_graph_cpy(gf, gb);
- ggml_build_backward_expand(ctx, ctx, gb, false);
+ ggml_build_backward_expand(ctx.get(), ctx.get(), gb, false);
if (expect.size() != 1 || expect[0] != 0.0f) {
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
- for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+ for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || ggml_graph_get_grad(gb, t)->op != GGML_OP_NONE);
}
}
- for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+ for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
if (!ggml_backend_supports_op(backend, t)) {
printf("not supported [%s] ", ggml_backend_name(backend));
supported = false;
}
if (!supported) {
printf("\n");
- ggml_free(ctx);
return true;
}
// allocate
- ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
+ ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
if (buf == NULL) {
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend));
- ggml_free(ctx);
return false;
}
-
- initialize_tensors(ctx); // Randomizes all tensors (including gradients).
+ initialize_tensors(ctx.get()); // Randomizes all tensors (including gradients).
ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise.
- ggml_backend_graph_compute(backend, gf);
- ggml_backend_graph_compute(backend, gb);
+ ggml_status status = ggml_backend_graph_compute(backend, gf);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
+ status = ggml_backend_graph_compute(backend, gb);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
bool ok = true;
- for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
+ for (struct ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != nullptr; t = ggml_get_next_tensor(ctx.get(), t)) {
if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) {
continue;
}
float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh
ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float));
- ggml_backend_graph_compute(backend, gf);
+ status = ggml_backend_graph_compute(backend, gf);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out));
ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float));
- ggml_backend_graph_compute(backend, gf);
+ status = ggml_backend_graph_compute(backend, gf);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out));
if (grad_precise()) {
ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float));
- ggml_backend_graph_compute(backend, gf);
+ status = ggml_backend_graph_compute(backend, gf);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out));
ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float));
- ggml_backend_graph_compute(backend, gf);
+ status = ggml_backend_graph_compute(backend, gf);
+ if (status != GGML_STATUS_SUCCESS) {
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
+ return false;
+ }
ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out));
gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps);
printf("compare failed ");
}
- ggml_backend_buffer_free(buf);
-
- ggml_free(ctx);
-
if (ok) {
printf("\033[1;32mOK\033[0m\n");
return true;