* llama : fix buffer checks for mamba and rwk
* llama : fix missing worst case flag during reserve
* cuda : fix supports_op for norm
* disable sched SET_CAUSE
return -1;
}
-#if 1
+#if 0
#define GGML_SCHED_MAX_SPLITS_DEBUG 4096
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
}
return false;
} break;
+ case GGML_OP_NORM:
+ case GGML_OP_RMS_NORM:
+ return ggml_is_contiguous(op->src[0]) && op->ne[0] % WARP_SIZE == 0;
+ break;
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
- case GGML_OP_NORM:
case GGML_OP_ADD:
case GGML_OP_ADD1:
case GGML_OP_SUB:
case GGML_OP_MUL:
case GGML_OP_DIV:
- case GGML_OP_RMS_NORM:
case GGML_OP_SCALE:
case GGML_OP_SQR:
case GGML_OP_SQRT:
const int64_t n_s = sx->ne[2];
// TODO: maybe support other strides than 1?
+ // FIXME: this is always true?
GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
GGML_ASSERT(sx->ne[1] == d_inner);
GGML_ASSERT(n_t >= 0);