GGML_ASSERT(nb00 == sizeof(float));
if (nb10 == sizeof(float)) {
- for (int j = ith; j < n; j += nth) {
+ const int j0 = (n/nth)*ith;
+ const int j1 = ith == nth - 1 ? n : (n/nth)*(ith + 1);
+
+ for (int j = j0; j < j1; j++) {
ggml_vec_add_f32(nc,
(float *) ((char *) dst->data + j*nb1),
(float *) ((char *) src0->data + j*nb01),
} break;
case GGML_OP_ADD:
{
- node->n_tasks = 1;
+ node->n_tasks = n_threads;
} break;
case GGML_OP_SUB:
case GGML_OP_MUL: