void* ggml_sycl_host_malloc(size_t size);
void ggml_sycl_host_free(void* ptr);
-static int g_ggml_sycl_debug = 0;
+extern int g_ggml_sycl_debug;
#define GGML_SYCL_DEBUG(...) \
do { \
if (g_ggml_sycl_debug) \
#include "ggml-sycl/gemm.hpp"
static bool g_sycl_loaded = false;
+int g_ggml_sycl_debug = 0;
static ggml_sycl_device_info ggml_sycl_init() {
ggml_sycl_device_info info = {};
static bool initialized = false;
if (!initialized) {
- GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
+ GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
GGML_LOG_INFO("GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
#if defined(GGML_SYCL_FORCE_MMQ)
GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ: yes\n");
if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F16) {
const sycl::half * src1_dd = static_cast<sycl::half *>(dst->src[1]->data);
+ GGML_SYCL_DEBUG("%s: F16 mask\n", __func__);
soft_max_f32_sycl<sycl::half>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias,
main_stream, ctx.device);
} else if (dst->src[1] && dst->src[1]->type == GGML_TYPE_F32) {
const float * src1_dd = static_cast<const float *>(dst->src[1]->data);
+ GGML_SYCL_DEBUG("%s: F32 mask\n", __func__);
soft_max_f32_sycl<float>(src0_dd, src1_dd, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
} else {
/* mask unavailable */
+ GGML_SYCL_DEBUG("%s: No mask\n", __func__);
soft_max_f32_sycl<float>(src0_dd, nullptr, dst_dd, ne00, nrows_x, nrows_y, scale, max_bias, main_stream, ctx.device);
}
}