const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
use_mul_mat_vec_f = use_mul_mat_vec_f && ggml_cuda_should_use_mmvf(src0->type, cc, src0->ne, is_mul_mat_id ? src1->ne[2] : src1->ne[1]);
+ const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) ||
+ ggml_backend_buft_is_cuda_split(src1->buffer->buft);
+
+ //TODO: add support for fusion for split buffers
+ if (split) {
+ return false;
+ }
+
//we only support fusion for ncols_dst = 1
if (tensor->op == GGML_OP_MUL_MAT && dst->ne[1] != 1) {
return false;
return false;
}
+
+ const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) ||
+ ggml_backend_buft_is_cuda_split(src1->buffer->buft);
+
+ //TODO: add support for fusion for split buffers
+ if (split) {
+ return false;
+ }
+
return use_mul_mat_vec_q;
}