void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_add(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_sub(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_mul(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_div(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_repeat(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
#include <fstream>
#include <iostream>
+#include <string>
#include "dpct/helper.hpp"
#include "ggml-sycl.h"
extern int g_ggml_sycl_disable_optimize;
extern int g_ggml_sycl_prioritize_dmmv;
-#define GGML_SYCL_DEBUG(...) \
- do { \
- if (g_ggml_sycl_debug) \
- fprintf(stderr, __VA_ARGS__); \
- } while (0)
+#if defined(__clang__) && __has_builtin(__builtin_expect)
+// Hint the optimizer to pipeline the more likely following instruction in branches
+# define LIKELY(expr) __builtin_expect(expr, true)
+# define UNLIKELY(expr) __builtin_expect(expr, false)
+#else
+# define LIKELY(expr) (expr)
+# define UNLIKELY(expr) (expr)
+#endif
+
+#define GGML_SYCL_DEBUG(...) \
+ do { \
+ if (UNLIKELY(g_ggml_sycl_debug)) \
+ fprintf(stderr, __VA_ARGS__); \
+ } while (0)
#define CHECK_TRY_ERROR(expr) \
[&]() { \
}
bool gpu_has_xmx(sycl::device &dev);
+
+template <int N, class T> void debug_print_array(const std::string & prefix, const T array[N]) {
+ if (LIKELY(!g_ggml_sycl_debug)) {
+ return;
+ }
+ std::stringstream ss;
+ ss << prefix << "=[";
+ for (std::size_t i = 0; i < N - 1; ++i) {
+ ss << array[i] << ", ";
+ }
+ if constexpr (N > 0) {
+ ss << array[N - 1];
+ }
+ ss << "]";
+ GGML_SYCL_DEBUG("%s", ss.str().c_str());
+}
+
+inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor,
+ const std::string & suffix = "") {
+ if (LIKELY(!g_ggml_sycl_debug)) {
+ return;
+ }
+ GGML_SYCL_DEBUG("%s=", prefix.c_str());
+ if (tensor) {
+ GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type));
+ debug_print_array<GGML_MAX_DIMS>(";ne", tensor->ne);
+ debug_print_array<GGML_MAX_DIMS>(";nb", tensor->nb);
+ if (!ggml_is_contiguous(tensor)) {
+ GGML_SYCL_DEBUG(";strided");
+ }
+ if (ggml_is_permuted(tensor)) {
+ GGML_SYCL_DEBUG(";permuted");
+ }
+ } else {
+ GGML_SYCL_DEBUG("nullptr");
+ }
+ GGML_SYCL_DEBUG("%s", suffix.c_str());
+}
+
+// Use scope_op_debug_print to log operations coming from running a model
+struct scope_op_debug_print {
+ // Use string_views to avoid the cost of creating a string and concatenating them
+ // string_views must be alive for as long as the object is alive
+ // scope_op_debug_print are used with string literals in practice which are stored in constant space so always accessible
+ scope_op_debug_print(const std::string_view & func, const std::string_view & func_suffix, const ggml_tensor * dst,
+ std::size_t num_src, const std::string_view & suffix = "") :
+ func(func),
+ func_suffix(func_suffix) {
+ if (LIKELY(!g_ggml_sycl_debug)) {
+ return;
+ }
+ GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
+ debug_print_tensor(" dst", dst);
+ if (dst) {
+ for (std::size_t i = 0; i < num_src; ++i) {
+ debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]);
+ }
+ }
+ GGML_SYCL_DEBUG("%s\n", suffix.data());
+ }
+
+ scope_op_debug_print(const std::string_view & func, const ggml_tensor * dst, std::size_t num_src,
+ const std::string_view & suffix = "") :
+ scope_op_debug_print(func, "", dst, num_src, suffix) {}
+
+ ~scope_op_debug_print() { GGML_SYCL_DEBUG("[SYCL][OP] call %s%s done\n", func.data(), func_suffix.data()); }
+
+ private:
+ std::string_view func;
+ std::string_view func_suffix;
+};
+
#endif // GGML_SYCL_COMMON_HPP
}
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
- const ggml_tensor *src0 = dst->src[0];
- const ggml_tensor *src1 = dst->src[1];
- queue_ptr stream = ctx.stream();
-
- const int32_t dim = ((int32_t *)dst->op_params)[0];
-
- if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
- const float *src0_d = (const float *)src0->data;
- const float *src1_d = (const float *)src1->data;
-
- float *dst_d = (float *)dst->data;
-
- if (dim != 3) {
- for (int i3 = 0; i3 < dst->ne[3]; i3++) {
- concat_f32_sycl(
- src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
- dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1],
- src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream);
- }
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
+ const ggml_tensor * src0 = dst->src[0];
+ const ggml_tensor * src1 = dst->src[1];
+ queue_ptr stream = ctx.stream();
+
+ const int32_t dim = ((int32_t *) dst->op_params)[0];
+
+ if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
+ const float * src0_d = (const float *) src0->data;
+ const float * src1_d = (const float *) src1->data;
+
+ float * dst_d = (float *) dst->data;
+
+ if (dim != 3) {
+ for (int i3 = 0; i3 < dst->ne[3]; i3++) {
+ concat_f32_sycl(src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
+ dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0],
+ dst->ne[1], dst->ne[2], dim, stream);
+ }
+ } else {
+ const size_t size0 = ggml_nbytes(src0);
+ const size_t size1 = ggml_nbytes(src1);
+
+ SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
+ SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
+ }
} else {
- const size_t size0 = ggml_nbytes(src0);
- const size_t size1 = ggml_nbytes(src1);
-
- SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
- SYCL_CHECK(CHECK_TRY_ERROR(
- stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
+ concat_f32_sycl_non_cont(stream, (const char *) src0->data, (const char *) src1->data, (char *) dst->data,
+ src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], src0->nb[0], src0->nb[1],
+ src0->nb[2], src0->nb[3], src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
+ src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
+ dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
}
- } else
- concat_f32_sycl_non_cont(
- stream, (const char *)src0->data, (const char *)src1->data,
- (char *)dst->data, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
- src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], src1->ne[0],
- src1->ne[1], src1->ne[2], src1->ne[3], src1->nb[0], src1->nb[1],
- src1->nb[2], src1->nb[3], dst->ne[0], dst->ne[1], dst->ne[2],
- dst->ne[3], dst->nb[0], dst->nb[1], dst->nb[2], dst->nb[3], dim);
}
}
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
const float * src0_d = (const float *)src0->data;
}
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
+ // Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
+ scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0,
+ std::string(" src0 type=") + ggml_type_name(src0->type));
const int64_t ne = ggml_nelements(src0);
GGML_ASSERT(ne == ggml_nelements(src1));
char * src0_ddc = (char *) src0->data;
char * src1_ddc = (char *) src1->data;
- GGML_SYCL_DEBUG("[SYCL] %s: Tensor supplied: %s to %s\n", __func__, ggml_type_name(src0->type),
- ggml_type_name(src1->type));
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
ggml_cpy_f32_f32_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10,
}
void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- // TODO: why do we pass dst as src1 here?
- GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_cpy(ctx, dst->src[0], dst);
- GGML_SYCL_DEBUG("[SYCL] call %s done\n", __func__);
}
src0->type == GGML_TYPE_Q8_0 || src0->type == GGML_TYPE_F16;
if (src1_convert_f16) {
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
+ " : converting src1 to fp16");
src1_dfloat = src1_dfloat_a.alloc(ne00);
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
GGML_ASSERT(to_fp16_sycl != nullptr);
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_sqrt(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_sin(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_cos(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_acc(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_gelu(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_silu(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_gelu_quick(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_tanh(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_relu(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_sigmoid(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_hardsigmoid(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_hardswish(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
-
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_exp(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_log(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_neg(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_step(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_leaky_relu(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_sqr(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_upscale(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_pad(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_clamp(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_sgn(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_abs(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s: DST Tensor type: %s\n", __func__, ggml_type_name(dst->type));
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_elu(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
GGML_UNUSED(ctx);
}
-void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
-
+void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ABORT("fatal error");
}
}
-
static enum ggml_status
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor) try {
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor, "\n");
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
if (tensor->view_src != NULL) {
ggml_tensor *tensor,
const void *data, size_t offset,
size_t size) try {
-
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor);
+ GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
ggml_sycl_set_device(ctx->device);
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
const ggml_tensor *tensor,
void *data, size_t offset,
size_t size) try {
-
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor);
+ GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
ggml_sycl_set_device(ctx->device);
ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
const ggml_tensor *src,
ggml_tensor *dst) try {
- if (ggml_backend_buffer_is_sycl(src->buffer)) {
+ bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": dst=", dst);
+ debug_print_tensor(" src=", src);
+ GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
+ if (is_cpy_supported) {
ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
ggml_backend_sycl_buffer_context * dst_ctx = (ggml_backend_sycl_buffer_context *)dst->buffer->context;
static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
uint8_t value) try {
- ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
+ GGML_SYCL_DEBUG("[SYCL] call %s: size=%zu\n", __func__, buffer->size);
+ ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
ggml_sycl_set_device(ctx->device);
queue_ptr stream = ctx->stream;
static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
size_t offset, size_t size) {
- GGML_SYCL_DEBUG(" [SYCL] call %s\n", __func__);
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor);
+ GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
SYCL_CHECK(ggml_sycl_set_device(ctx->device));
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
static enum ggml_status
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor) try {
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor, "\n");
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor, const void *data,
size_t offset, size_t size) try {
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor);
+ GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
// split tensors must always be set in their entirety at once
GGML_ASSERT(offset == 0);
GGML_ASSERT(size == ggml_nbytes(tensor));
ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
const ggml_tensor *tensor, void *data,
size_t offset, size_t size) try {
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor);
+ GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
// split tensors must always be set in their entirety at once
GGML_ASSERT(offset == 0);
GGML_ASSERT(size == ggml_nbytes(tensor));
#else
bool use_fp16 = false;
#endif
- if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
- use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] &&
- dst->op_params[0] == GGML_PREC_DEFAULT) {
- // GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
+ if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && use_fp16 && ggml_is_contiguous(src0) &&
+ row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) {
ggml_sycl_pool_alloc<sycl::half> src0_as_f16(ctx.pool());
if (src0->type != GGML_TYPE_F16) {
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
+ " : converting src0 to fp16");
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type, dst);
GGML_ASSERT(to_fp16_sycl != nullptr);
size_t ne = row_diff*ne00;
ggml_sycl_pool_alloc<sycl::half> src1_as_f16(ctx.pool());
if (src1->type != GGML_TYPE_F16) {
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
+ " : converting src1 to fp16");
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
GGML_ASSERT(to_fp16_sycl != nullptr);
size_t ne = src1_ncols*ne10;
DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr,
DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
dst_f16.get(), DnnlGemmWrapper::to_dt<sycl::half>(), stream);
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
+ " : converting dst to fp32");
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff* src1_ncols, stream);
}
src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
dst_f16.get(), dpct::library_data_t::real_half, ldc,
dpct::library_data_t::real_half)));
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
+ " : converting dst to fp32");
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
}
- }
- else {
- // GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n");
+ } else {
ggml_sycl_pool_alloc<float> src0_ddq_as_f32(ctx.pool());
ggml_sycl_pool_alloc<float> src1_ddq_as_f32(ctx.pool());
if (src0->type != GGML_TYPE_F32) {
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
+ " : converting src0 to fp32");
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src0->type, dst);
GGML_ASSERT(to_fp32_sycl != nullptr);
src0_ddq_as_f32.alloc(row_diff*ne00);
to_fp32_sycl(src0_dd_i, src0_ddq_as_f32.get(), row_diff*ne00, stream);
}
if (src1->type != GGML_TYPE_F32) {
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
+ " : converting src1 to fp32");
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src1->type, dst);
GGML_ASSERT(to_fp32_sycl != nullptr);
src1_ddq_as_f32.alloc(src1_ncols*ne10);
std::exit(1);
}
-static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
-
+static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
dpct::queue_ptr main_stream = ctx.stream();
sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
}
-inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
-
+inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
dpct::queue_ptr main_stream = ctx.stream();
argsort_f32_i32_sycl(src0_dd, (int *) dst_dd, ncols, nrows, order, main_stream);
}
-inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
-
+inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_I32);
argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
}
-inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tensor *dst) {
-
+inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
dpct::queue_ptr main_stream = ctx.stream();
diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
}
-inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
-
+inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
dpct::queue_ptr main_stream = ctx.stream();
dev[i].src1_ddq = dev[i].src1_ddq_alloc.alloc(ctx.pool(i), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);
if (src1_on_device && src1_is_contiguous) {
+ scope_op_debug_print scope_dbg_print(__func__, "/quantize_row_q8_1_sycl", dst,
+ /*num_src=*/2, " : converting src1 to Q8_1");
quantize_row_q8_1_sycl(dev[i].src1_ddf, dev[i].src1_ddq, ne10, nrows1, src1_padded_col_size, stream);
/*
DPCT1010:90: SYCL uses exceptions to report errors and does not
}
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
+ scope_op_debug_print scope_dbg_print(__func__, "/quantize_row_q8_1_sycl", dst,
+ /*num_src=*/2, " : converting src1 to Q8_1");
quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
/*
DPCT1010:92: SYCL uses exceptions to report errors and does
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_get_rows(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_norm(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_rms_norm(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_l2_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_l2_norm(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_group_norm(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
// convert src1 to fp16
if (src1->type != GGML_TYPE_F16) {
+ scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_nc_sycl", dst, /*num_src=*/2,
+ " : converting src1 to fp16");
const to_fp16_nc_sycl_t to_fp16_nc_sycl = get_to_fp16_nc_sycl(src1->type);
GGML_ASSERT(to_fp16_nc_sycl != nullptr);
const int64_t ne_src1 = ggml_nelements(src1);
}
static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
int64_t min_compute_capability = INT_MAX;
constexpr bool convert_src1_to_q8_1 = false;
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_mul_mat_sycl, convert_src1_to_q8_1);
}
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
ggml_tensor *dst) try {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/3);
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
}
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_scale(ctx, dst);
}
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_diag_mask_inf(ctx, dst);
}
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
ggml_sycl_op_pool2d(ctx, dst);
}
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
ggml_sycl_op_im2col(ctx, dst);
}
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_sum(ctx, dst);
}
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_sum_rows(ctx, dst);
}
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_argsort(ctx, dst);
}
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_argmax(ctx, dst);
}
ggml_tensor *tensor,
const void *data, size_t offset,
size_t size) try {
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor);
+ GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
const ggml_tensor *tensor,
void *data, size_t offset,
size_t size) try {
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": tensor=", tensor);
+ GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
const ggml_tensor *src,
ggml_tensor *dst) try {
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
- if (dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer)) {
+ bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
+ ggml_backend_buffer_is_sycl(src->buffer);
+ GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
+ debug_print_tensor(": dst=", dst);
+ debug_print_tensor(" src=", src);
+ GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
+ if (is_cpy_supported) {
/*
DPCT1009:215: SYCL uses exceptions to report errors and does not use the
error codes. The original code was commented out and a warning string
}
static void ggml_backend_sycl_synchronize(ggml_backend_t backend) try {
+ GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
SYCL_CHECK(CHECK_TRY_ERROR((stream)->wait()));
}
static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
-
+ GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
if (ggml_backend_is_sycl(backend)) {
static void ggml_backend_sycl_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) try {
GGML_UNUSED(dev);
+ GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));
}
void ggml_sycl_op_gated_linear_attn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/5);
const float * k_d = static_cast<const float *>(dst->src[0]->data);
const float * v_d = static_cast<const float *>(dst->src[1]->data);
const float * r_d = static_cast<const float *>(dst->src[2]->data);
case GGML_TYPE_Q4_K:
if ((ggml_tensor_extra_gpu *) dst->src[0]->extra &&
((ggml_tensor_extra_gpu *) dst->src[0]->extra)->optimized_feature.reorder) {
+ GGML_SYCL_DEBUG("Calling reorder_mul_mat_vec_q4_k_q8_1_sycl\n");
reorder_mul_mat_vec_q4_k_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
} else {
+ GGML_SYCL_DEBUG("Calling mul_mat_vec_q4_K_q8_1_sycl\n");
mul_mat_vec_q4_K_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
}
break;
#include "outprod.hpp"
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
}
void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- GGML_SYCL_DEBUG("call %s\n", __func__);
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/3);
ggml_sycl_op_rope(ctx, dst);
- GGML_SYCL_DEBUG("call %s done\n", __func__);
}
}
void ggml_sycl_op_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
-
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F16) {
const sycl::half * src1_dd = static_cast<sycl::half *>(dst->src[1]->data);
- GGML_SYCL_DEBUG("%s: F16 mask\n", __func__);
soft_max_f32_sycl<sycl::half>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias,
main_stream, ctx.device);
} else if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F32) {
const float * src1_dd = static_cast<const float *>(dst->src[1]->data);
- GGML_SYCL_DEBUG("%s: F32 mask\n", __func__);
soft_max_f32_sycl<float>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
} else {
/* mask unavailable */
- GGML_SYCL_DEBUG("%s: No mask\n", __func__);
soft_max_f32_sycl<float>(src0_dd, nullptr, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
}
}
}
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
- const ggml_tensor *src0 = dst->src[0];
- const ggml_tensor *src1 = dst->src[1];
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+ const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
dpct::queue_ptr stream = ctx.stream();
const int max_period = dst->op_params[1];
timestep_embedding_f32_sycl(src0_d, dst_d, src0->ne[0], dst->nb[1], dim, max_period, stream);
- GGML_UNUSED(src1);
}
}
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
-
- const ggml_tensor *src0 = dst->src[0];
- const ggml_tensor *src1 = dst->src[1];
-
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/6);
const float* k_d = (const float*)dst->src[0]->data;
const float* v_d = (const float*)dst->src[1]->data;
const float* r_d = (const float*)dst->src[2]->data;
});
});
}
-
- GGML_UNUSED(src0);
- GGML_UNUSED(src1);
}
void ggml_sycl_op_rwkv_wkv7(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
-
- const ggml_tensor *src0 = dst->src[0];
- const ggml_tensor *src1 = dst->src[1];
-
+ scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/7);
const float* r_d = (const float*)dst->src[0]->data;
const float* w_d = (const float*)dst->src[1]->data;
const float* k_d = (const float*)dst->src[2]->data;
});
});
}
-
- GGML_UNUSED(src0);
- GGML_UNUSED(src1);
}