struct ggml_cplan cplan;
memset(&cplan, 0, sizeof(struct ggml_cplan));
+ int max_tasks = 1;
+
// thread scheduling for the different operations + work buffer size estimation
for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i];
const int n_tasks = ggml_get_n_tasks(node, n_threads);
+ max_tasks = MAX(max_tasks, n_tasks);
+
size_t cur = 0;
switch (node->op) {
work_size += CACHE_LINE_SIZE*(n_threads - 1);
}
- cplan.n_threads = n_threads;
+ cplan.n_threads = MIN(max_tasks, n_threads);
cplan.work_size = work_size;
cplan.work_data = NULL;