std::exit(1);
}
+bool gpu_has_xmx(sycl::device &dev) {
+ return dev.has(sycl::aspect::ext_intel_matrix);
+}
+
int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
const int64_t max_range = std::numeric_limits<int>::max();
int64_t sycl_down_blk_size = block_size;
}
}
+bool gpu_has_xmx(sycl::device &dev);
void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
});
}
-void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1, ggml_tensor *dst) {
+void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+ const ggml_tensor *src0 = dst->src[0];
+ const ggml_tensor *src1 = dst->src[1];
queue_ptr stream = ctx.stream();
const int32_t dim = ((int32_t *)dst->op_params)[0];
#include "common.hpp"
-void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1, ggml_tensor *dst);
+void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
#endif // GGML_SYCL_CONCAT_HPP
});
}
-void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1, ggml_tensor *dst) {
+void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+ const ggml_tensor *src0 = dst->src[0];
+ const ggml_tensor *src1 = dst->src[1];
const float * src0_d = (const float *)src0->data;
const float * src1_d = (const float *)src1->data;
#include "common.hpp"
-void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1, ggml_tensor *dst);
+void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
#endif // GGML_SYCL_CONV_HPP
}
-void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
}
-void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
+void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_ELEMENTWISE_HPP
GGML_ASSERT(info.device_count <= GGML_SYCL_MAX_DEVICES);
int64_t total_vram = 0;
-#if defined(GGML_SYCL_FORCE_MMQ)
- GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__);
-#else
- GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: no\n", __func__);
-#endif
-#if defined(SYCL_USE_XMX)
- GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
-#else
- GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
-#endif
- GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME);
-
+/* This is a bit misleading; reserved for later */
+// #if defined(SYCL_USE_XMX)
+// GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
+// #else
+// GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
+// #endif
for (int i = 0; i < info.device_count; ++i) {
info.devices[i].vmm = 0;
dpct::device_info prop;
name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
auto global_mem_size = prop.get_global_mem_size()/1000000;
-
- GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
+ std::string xmx = gpu_has_xmx(device) ? "yes" : "no";
+ GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|%14s|\n", id, device_type.c_str(),
name.c_str(), version.c_str(), prop.get_max_compute_units(),
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
- global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
+ global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str(), xmx.c_str());
}
void ggml_backend_sycl_print_sycl_devices() {
GGML_LOG_INFO(
"| | | | "
- " |Max | |Max |Global | |\n");
+ " |Max | |Max |Global | | XMX |\n");
GGML_LOG_INFO(
"| | | | "
- " |compute|Max work|sub |mem | |\n");
+ " |compute|Max work|sub |mem | | or |\n");
GGML_LOG_INFO(
"|ID| Device Type| "
- "Name|Version|units |group |group|size | Driver version|\n");
+ "Name|Version|units |group |group|size | Driver version| Tensor Cores |\n");
GGML_LOG_INFO(
"|--|-------------------|---------------------------------------|------"
- "-|-------|--------|-----|-------|---------------------|\n");
+ "-|-------|--------|-----|-------|---------------------|--------------|\n");
for (int id = 0; id < device_count; ++id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
static bool initialized = false;
if (!initialized) {
- GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n");
+ GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
- GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
-
+ GGML_LOG_INFO("GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
+#if defined(GGML_SYCL_FORCE_MMQ)
+ GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ: yes\n");
+#else
+ GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ: no\n");
+#endif
#if defined(GGML_SYCL_F16)
- GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
+ GGML_LOG_INFO("GGML_SYCL_F16: yes\n");
#else
- GGML_LOG_INFO("%s: GGML_SYCL_F16: no\n", __func__);
+ GGML_LOG_INFO("GGML_SYCL_F16: no\n");
#endif
/* NOT REMOVE, keep it for next optimize for XMX.
/// kernels
typedef void (*cpy_kernel_t)(const char * cx, char * cdst);
-typedef void (*ggml_sycl_func_t)(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
typedef void (*ggml_sycl_op_mul_mat_t)(
ggml_backend_sycl_context & ctx,
const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
}
-static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__);
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm);
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm);
GGML_SYCL_DEBUG("call %s done\n", __func__);
}
}
}
-static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1,
+static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
ggml_tensor *dst) try {
+ const ggml_tensor *src0 = dst->src[0];
+ const ggml_tensor *src1 = dst->src[1];
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
const ggml_tensor *ids = dst->src[2];
std::exit(1);
}
-static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale);
+static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale);
}
-static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp);
+static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp);
}
static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
ggml_type_name(src0->type), ggml_type_name(src1->type));
GGML_ABORT("fatal error");
}
-
GGML_UNUSED(dst);
}
catch (sycl::exception const &exc) {
std::exit(1);
}
-static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
// TODO: why do we pass dst as src1 here?
- ggml_sycl_cpy(ctx, src0, dst, nullptr);
- GGML_UNUSED(src1);
+ ggml_sycl_cpy(ctx, dst->src[0], dst, nullptr);
}
-static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf);
+static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf);
}
-static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max);
+static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_soft_max);
}
-static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope);
+static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope);
}
-static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d);
+static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d);
}
-static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col);
+static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col);
}
-static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_ASSERT(ggml_is_contiguous(src0));
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum);
+static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum);
}
-static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_ASSERT(ggml_is_contiguous(src0));
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows);
+static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows);
}
-static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_ASSERT(ggml_is_contiguous(src0));
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort);
+static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort);
}
-static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_ASSERT(ggml_is_contiguous(src0));
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax);
+static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax);
}
-static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_UNUSED(src0);
- GGML_UNUSED(src1);
- GGML_UNUSED(dst);
- GGML_UNUSED(ctx);
-}
void ggml_sycl_set_main_device(const int main_device) try {
if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) {
std::exit(1);
}
-bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * tensor) {
+bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) {
if (!g_sycl_loaded) return false;
- ggml_sycl_func_t func;
+ if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) {
+ ggml_sycl_set_peer_access(dst->src[1]->ne[1], ctx.device);
+ }
- switch (tensor->op) {
+ switch (dst->op) {
case GGML_OP_ARGMAX:
- func = ggml_sycl_argmax;
+ ggml_sycl_argmax(ctx, dst);
break;
case GGML_OP_CONV_TRANSPOSE_1D:
- func = ggml_sycl_op_conv_transpose_1d;
+ ggml_sycl_op_conv_transpose_1d(ctx, dst);
break;
case GGML_OP_REPEAT:
- func = ggml_sycl_repeat;
+ ggml_sycl_repeat(ctx, dst);
break;
case GGML_OP_GET_ROWS:
- func = ggml_sycl_get_rows;
+ ggml_sycl_get_rows(ctx, dst);
break;
case GGML_OP_DUP:
- func = ggml_sycl_dup;
+ ggml_sycl_dup(ctx, dst);
break;
case GGML_OP_ADD:
case GGML_OP_ADD1: // TODO: more efficient implementation
- func = ggml_sycl_add;
+ ggml_sycl_add(ctx, dst);
break;
case GGML_OP_SUB:
- func = ggml_sycl_sub;
+ ggml_sycl_sub(ctx, dst);
break;
case GGML_OP_ACC:
- func = ggml_sycl_acc;
+ ggml_sycl_acc(ctx, dst);
break;
case GGML_OP_MUL:
- func = ggml_sycl_mul;
+ ggml_sycl_mul(ctx, dst);
break;
case GGML_OP_LOG:
- func = ggml_sycl_log;
+ ggml_sycl_log(ctx, dst);
break;
case GGML_OP_DIV:
- func = ggml_sycl_div;
+ ggml_sycl_div(ctx, dst);
break;
case GGML_OP_UNARY:
- switch (ggml_get_unary_op(tensor)) {
+ switch (ggml_get_unary_op(dst)) {
case GGML_UNARY_OP_NEG:
- func = ggml_sycl_neg;
+ ggml_sycl_neg(ctx, dst);
break;
case GGML_UNARY_OP_STEP:
- func = ggml_sycl_step;
+ ggml_sycl_step(ctx, dst);
break;
case GGML_UNARY_OP_GELU:
- func = ggml_sycl_gelu;
+ ggml_sycl_gelu(ctx, dst);
break;
case GGML_UNARY_OP_SILU:
- func = ggml_sycl_silu;
+ ggml_sycl_silu(ctx, dst);
break;
case GGML_UNARY_OP_GELU_QUICK:
- func = ggml_sycl_gelu_quick;
+ ggml_sycl_gelu_quick(ctx, dst);
break;
case GGML_UNARY_OP_TANH:
- func = ggml_sycl_tanh;
+ ggml_sycl_tanh(ctx, dst);
break;
case GGML_UNARY_OP_RELU:
- func = ggml_sycl_relu;
+ ggml_sycl_relu(ctx, dst);
break;
case GGML_UNARY_OP_SIGMOID:
- func = ggml_sycl_sigmoid;
+ ggml_sycl_sigmoid(ctx, dst);
break;
case GGML_UNARY_OP_HARDSIGMOID:
- func = ggml_sycl_hardsigmoid;
+ ggml_sycl_hardsigmoid(ctx, dst);
break;
case GGML_UNARY_OP_HARDSWISH:
- func = ggml_sycl_hardswish;
+ ggml_sycl_hardswish(ctx, dst);
break;
case GGML_UNARY_OP_EXP:
- func = ggml_sycl_exp;
+ ggml_sycl_exp(ctx, dst);
break;
default:
return false;
}
break;
case GGML_OP_NORM:
- func = ggml_sycl_norm;
+ ggml_sycl_norm(ctx, dst);
break;
case GGML_OP_GROUP_NORM:
- func = ggml_sycl_group_norm;
+ ggml_sycl_group_norm(ctx, dst);
break;
case GGML_OP_CONCAT:
- func = ggml_sycl_op_concat;
+ ggml_sycl_op_concat(ctx, dst);
break;
case GGML_OP_UPSCALE:
- func = ggml_sycl_upscale;
+ ggml_sycl_upscale(ctx, dst);
break;
case GGML_OP_PAD:
- func = ggml_sycl_pad;
+ ggml_sycl_pad(ctx, dst);
break;
case GGML_OP_LEAKY_RELU:
- func = ggml_sycl_leaky_relu;
+ ggml_sycl_leaky_relu(ctx, dst);
break;
case GGML_OP_RMS_NORM:
- func = ggml_sycl_rms_norm;
+ ggml_sycl_rms_norm(ctx, dst);
break;
case GGML_OP_MUL_MAT:
- if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
+ if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
return false;
}
- func = ggml_sycl_mul_mat;
+ /* ggml_sycl_mul_mat_id is dependent on ggml_sycl_mul_mat */
+ ggml_sycl_mul_mat(ctx, dst->src[0], dst->src[1], dst);
break;
case GGML_OP_MUL_MAT_ID:
- if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
+ if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
return false;
}
- func = ggml_sycl_mul_mat_id;
+ ggml_sycl_mul_mat_id(ctx, dst);
break;
case GGML_OP_OUT_PROD:
- func = ggml_sycl_op_out_prod;
+ ggml_sycl_op_out_prod(ctx, dst);
break;
case GGML_OP_SCALE:
- func = ggml_sycl_scale;
+ ggml_sycl_scale(ctx, dst);
break;
case GGML_OP_SQR:
- func = ggml_sycl_sqr;
+ ggml_sycl_sqr(ctx, dst);
break;
case GGML_OP_SQRT:
- func = ggml_sycl_sqrt;
+ ggml_sycl_sqrt(ctx, dst);
break;
case GGML_OP_SIN:
- func = ggml_sycl_sin;
+ ggml_sycl_sin(ctx, dst);
break;
case GGML_OP_COS:
- func = ggml_sycl_cos;
+ ggml_sycl_cos(ctx, dst);
break;
case GGML_OP_CLAMP:
- func = ggml_sycl_clamp;
+ ggml_sycl_clamp(ctx, dst);
break;
case GGML_OP_CPY:
- func = ggml_sycl_cpy;
+ ggml_sycl_cpy(ctx, dst->src[0], dst->src[1], dst);
break;
case GGML_OP_CONT:
- func = ggml_sycl_dup;
+ ggml_sycl_dup(ctx, dst);
break;
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
- func = ggml_sycl_nop;
+ GGML_SYCL_DEBUG("%s: Tensor NO-OP\n", __func__);
break;
case GGML_OP_DIAG_MASK_INF:
- func = ggml_sycl_diag_mask_inf;
+ ggml_sycl_diag_mask_inf(ctx, dst);
break;
case GGML_OP_SOFT_MAX:
- func = ggml_sycl_soft_max;
+ ggml_sycl_soft_max(ctx, dst);
break;
case GGML_OP_ROPE:
- func = ggml_sycl_rope;
+ ggml_sycl_rope(ctx, dst);
break;
case GGML_OP_IM2COL:
- func = ggml_sycl_im2col;
+ ggml_sycl_im2col(ctx, dst);
break;
case GGML_OP_POOL_2D:
- func = ggml_sycl_pool2d;
+ ggml_sycl_pool2d(ctx, dst);
break;
case GGML_OP_SUM:
- func = ggml_sycl_sum;
+ ggml_sycl_sum(ctx, dst);
break;
case GGML_OP_SUM_ROWS:
- func = ggml_sycl_sum_rows;
+ ggml_sycl_sum_rows(ctx, dst);
break;
case GGML_OP_ARGSORT:
- func = ggml_sycl_argsort;
+ ggml_sycl_argsort(ctx, dst);
break;
case GGML_OP_TIMESTEP_EMBEDDING:
- func = ggml_sycl_op_timestep_embedding;
+ ggml_sycl_op_timestep_embedding(ctx, dst);
break;
case GGML_OP_RWKV_WKV6:
- func = ggml_sycl_op_rwkv_wkv6;
+ ggml_sycl_op_rwkv_wkv6(ctx, dst);
break;
default:
return false;
}
- if (tensor->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(tensor->src[0]->buffer)) {
- ggml_sycl_set_peer_access(tensor->src[1]->ne[1], ctx.device);
- }
-
- func(ctx, tensor->src[0], tensor->src[1], tensor);
return true;
}
#include "outprod.hpp"
-void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
- const ggml_tensor* src1, ggml_tensor* dst) {
-
+void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
+ const ggml_tensor *src0 = dst->src[0];
+ const ggml_tensor *src1 = dst->src[1];
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
#include "common.hpp"
-void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
- const ggml_tensor* src1, ggml_tensor* dst);
+void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
#endif // GGML_SYCL_OUTPROD_HPP
});
}
-void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1, ggml_tensor * dst) {
+void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ const ggml_tensor *src0 = dst->src[0];
+ const ggml_tensor *src1 = dst->src[1];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
dpct::queue_ptr stream = ctx.stream();
#include "common.hpp"
-void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1, ggml_tensor * dst);
+void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_TSEMBD_HPP
}
}
-void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
- const ggml_tensor* src1, ggml_tensor* dst) {
+void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
+
+ const ggml_tensor *src0 = dst->src[0];
+ const ggml_tensor *src1 = dst->src[1];
const float* k_d = (const float*)dst->src[0]->data;
const float* v_d = (const float*)dst->src[1]->data;
#include "common.hpp"
-void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
- const ggml_tensor *src1, ggml_tensor * dst);
+void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_WKV6_HPP