#define GGML_MAX_NAME 48
#define GGML_DEFAULT_N_THREADS 4
+
+#define GGML_EXIT_SUCCESS 0
+#define GGML_EXIT_ABORTED 1
+
#define GGML_UNUSED(x) (void)(x)
+
#define GGML_ASSERT(x) \
do { \
if (!(x)) { \
// the `n_tasks` of nodes, 1:1 mapping to cgraph nodes
int n_tasks[GGML_MAX_NODES];
+
+ // abort ggml_graph_compute when true
+ bool (*abort_callback)(void * data);
+ void * abort_callback_data;
};
// computation graph
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
- GGML_API void ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
+ GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
// same as ggml_graph_compute() but the work data is allocated as a part of the context
#include <float.h>
#include <limits.h>
#include <stdarg.h>
+#include <signal.h>
#ifdef GGML_USE_METAL
#include <unistd.h>
// synchronization primitives
atomic_int n_active; // num active threads
atomic_int node_n; // active graph node
+
+ bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
+ void * abort_callback_data;
};
struct ggml_compute_state {
int node_n = -1;
while (true) {
+ if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
+ state->shared->node_n += 1;
+ return GGML_EXIT_ABORTED;
+ }
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
// all other threads are finished and spinning
// do finalize and init here so we don't have synchronize again
} else {
break;
}
+
+ if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
+ break;
+ }
}
atomic_store(&state->shared->n_active, n_threads);
}
}
- return 0;
+ return GGML_EXIT_SUCCESS;
}
+static bool always_false(void * data) { return false; }
struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
if (n_threads <= 0) {
n_threads = GGML_DEFAULT_N_THREADS;
return cplan;
}
-void ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
+int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
{
GGML_ASSERT(cplan);
GGML_ASSERT(cplan->n_threads > 0);
const int64_t perf_start_time_us = ggml_perf_time_us();
// this is a work thread too
- ggml_graph_compute_thread(&workers[0]);
+ int compute_status = ggml_graph_compute_thread(&workers[0]);
// don't leave affinity set on the main thread
clear_numa_thread_affinity();
- // join thread pool
+ // join or kill thread pool
if (n_threads > 1) {
for (int j = 1; j < n_threads; j++) {
const int rc = ggml_thread_join(workers[j].thrd, NULL);
(double) perf_time_us_cur / 1000.0,
(double) cgraph->perf_time_us / 1000.0 / cgraph->perf_runs);
}
+
+ return compute_status;
}
void ggml_graph_reset(struct ggml_cgraph * cgraph) {