void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
// compute graph without a plan
- void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
// check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
backend->iface.graph_plan_compute(backend, plan);
}
-void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
- backend->iface.graph_compute(backend, cgraph);
+bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+ return backend->iface.graph_compute(backend, cgraph);
}
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
- GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
// tensor copy between different backends
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
// compute graph without a plan
- void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
// check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
ggml_backend_synchronize(backend);
}
-void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
- backend->iface.graph_compute(backend, cgraph);
+bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+ if (!backend->iface.graph_compute(backend, cgraph)) {
+ return false;
+ }
// TODO: optional sync
ggml_backend_synchronize(backend);
+ return true;
}
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
GGML_UNUSED(backend);
}
-static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
cplan.work_data = cpu_ctx->work_data;
ggml_graph_compute(cgraph, &cplan);
+ return true;
}
static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
- GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
// tensor copy between different backends
UNUSED(plan);
}
-static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
+static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context;
ggml_cuda_set_main_device(cuda_ctx->device);
}
UNUSED(backend);
+
+ return true;
}
static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
// same as ggml_graph_compute but uses Metal
// creates gf->n_threads command buffers in parallel
-void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
+bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
//
// backend API
return false;
}
}
-void ggml_metal_graph_compute(
+bool ggml_metal_graph_compute(
struct ggml_metal_context * ctx,
struct ggml_cgraph * gf) {
@autoreleasepool {
MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
if (status != MTLCommandBufferStatusCompleted) {
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
- GGML_ASSERT(false);
+ return false;
}
}
+ return true;
}
}
UNUSED(backend);
}
-static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
- ggml_metal_graph_compute(metal_ctx, cgraph);
+ return ggml_metal_graph_compute(metal_ctx, cgraph);
}
static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
// ggml helpers
//
-static void ggml_graph_compute_helper(
+static bool ggml_graph_compute_helper(
struct ggml_cgraph * graph,
std::vector<uint8_t> & buf,
int n_threads,
plan.work_data = buf.data();
}
- ggml_graph_compute(graph, &plan);
+ return ggml_graph_compute(graph, &plan);
}
-static void ggml_graph_compute_helper(
+static bool ggml_graph_compute_helper(
struct ggml_backend * backend,
struct ggml_cgraph * graph,
int n_threads) {
ggml_backend_metal_set_n_cb(backend, n_threads);
}
#endif
- ggml_backend_graph_compute(backend, graph);
+ return ggml_backend_graph_compute(backend, graph);
}
// faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
ggml_allocr_alloc_graph(alloc, gf);
if (!whisper_encode_external(wstate)) {
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+ return false;
+ }
}
}
ggml_allocr_alloc_graph(alloc, gf);
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+ return false;
+ }
}
// cross
ggml_allocr_alloc_graph(alloc, gf);
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+ return false;
+ }
}
wstate.t_encode_us += ggml_time_us() - t_start_us;
logits = gf->nodes[gf->n_nodes - 1];
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+ return false;
+ }
}
logits_out.resize(n_tokens*n_vocab);