extern "C" {
#endif
+// device buffer
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void);
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_zdnn_reg(void);
#ifdef __cplusplus
--- /dev/null
+#ifndef GGML_ZDNN_COMMON_HPP
+#define GGML_ZDNN_COMMON_HPP
+
+#include "ggml.h"
+#include "ggml-impl.h"
+
+#include "zdnn.h"
+
+#include <vector>
+#include <memory>
+
+#define GGML_ZDNN_NAME "zDNN"
+#define GGML_ZDNN_VERSION ZDNN_VERNUM
+
+#define ZDNN_CHECK(stmt) \
+ do { \
+ zdnn_status status = (stmt); \
+ GGML_ASSERT(status == ZDNN_OK); \
+ } while (0);
+
+struct ggml_backend_zdnn_device_context {
+ int zdnn_device;
+ int zdnn_device_ref_count;
+
+ bool has_parmblkformat_0;
+ bool has_parmblkformat_1; // checks for z17
+
+ size_t max_size;
+
+ char name[128];
+};
+
+struct ggml_backend_zdnn_context {
+ int device;
+ ggml_cgraph * gf;
+};
+
+struct ggml_backend_zdnn_buffer {
+ void * data;
+ ggml_backend_zdnn_buffer * extra; // for bias, etc.
+ size_t size;
+
+ zdnn_tensor_desc pre_tfm_desc;
+ zdnn_tensor_desc tfm_desc;
+ zdnn_ztensor ztensor;
+
+ char name[GGML_MAX_NAME];
+};
+
+struct ggml_backend_zdnn_buffer_context {
+ void * all_data;
+ size_t all_size;
+ bool owned;
+
+ int n_buffers;
+ std::vector<std::unique_ptr<ggml_backend_zdnn_buffer>> buffers;
+};
+
+#endif // GGML_ZDNN_COMMON_HPP
+++ /dev/null
-#ifndef GGML_ZDNN_IMPL
-#define GGML_ZDNN_IMPL
-
-#include "zdnn.h"
-#include "ggml.h"
-#include "ggml-zdnn.h"
-
-#include <vector>
-#include <memory>
-#include <vecintrin.h>
-
-#define GGML_ZDNN_NAME "zDNN"
-#define GGML_ZDNN_VERSION ZDNN_VERNUM
-
-#define vec_neg(a) (-(a)) // Vector Negate
-#define vec_add(a, b) ((a) + (b)) // Vector Add
-#define vec_sub(a, b) ((a) - (b)) // Vector Subtract
-#define vec_mul(a, b) ((a) * (b)) // Vector Multiply
-#define vec_div(a, b) ((a) / (b)) // Vector Divide
-#define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
-#define vec_sra(a, b) ((a) >> (b)) // Vector Shift Right
-#define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebraic
-#define vec_slo(a, b) vec_slb(a, (b) << 64) // Vector Shift Left by Octet
-#define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
-
-#ifndef vec_and
-#define vec_and(a, b) ((a) & (b)) // Vector AND
-#endif
-
-#ifndef vec_or
-#define vec_or(a, b) ((a) | (b)) // Vector OR
-#endif
-
-#ifndef vec_xor
-#define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
-#endif
-
-typedef signed char char8x16_t __attribute__((vector_size(16)));
-typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
-
-typedef int8_t int8x16_t __attribute__((vector_size(16)));
-typedef int16_t int16x8_t __attribute__((vector_size(16)));
-typedef int32_t int32x4_t __attribute__((vector_size(16)));
-typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
-typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
-typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
-
-typedef float float32x4_t __attribute__((vector_size(16)));
-typedef double double64x2_t __attribute__((vector_size(16)));
-
-typedef signed long long long64x2_t __attribute__((vector_size(16)));
-typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
-
-#define ZDNN_CHECK(stmt) \
- do { \
- zdnn_status status = (stmt); \
- GGML_ASSERT(status == ZDNN_OK); \
- } while (0);
-
-struct ggml_backend_zdnn_device_context {
- int zdnn_device;
- int zdnn_device_ref_count;
-
- bool has_parmblkformat_0;
- bool has_parmblkformat_1;
-
- size_t max_size;
-
- char name[128];
-};
-
-struct ggml_backend_zdnn_context {
- int device;
- ggml_cgraph * gf;
-};
-
-struct ggml_backend_zdnn_buffer {
- void * data;
- ggml_backend_zdnn_buffer * extra; // for bias, etc.
- size_t size;
-
- zdnn_tensor_desc pre_tfm_desc;
- zdnn_tensor_desc tfm_desc;
- zdnn_ztensor ztensor;
-
- char name[GGML_MAX_NAME];
-};
-
-struct ggml_backend_zdnn_buffer_context {
- void * all_data;
- size_t all_size;
- bool owned;
-
- int n_buffers;
- std::vector<std::unique_ptr<ggml_backend_zdnn_buffer>> buffers;
-};
-
-#endif // GGML_ZDNN_IMPL
-#include "zdnn.h"
#include "ggml-zdnn.h"
-#include "ggml-zdnn-impl.h"
-
#include "ggml-impl.h"
#include "ggml-backend-impl.h"
+#include "ggml-zdnn/common.hpp"
+#include "ggml-zdnn/mmf.hpp"
+#include "ggml-zdnn/utils.hpp"
+#include "ggml.h"
+
#include <vector>
#include <memory>
-#include <csignal>
+#include <csignal> // raise(SIGTRAP)
#include <unistd.h>
-inline zdnn_data_types ggml_zdnn_type_mapping(ggml_type type) {
- switch (type) {
- case GGML_TYPE_F32:
- return FP32;
- case GGML_TYPE_F16:
- return FP16;
- case GGML_TYPE_BF16:
- return BFLOAT;
- case GGML_TYPE_I8:
- return INT8;
- case GGML_TYPE_I32:
- return INT32;
- case GGML_TYPE_Q8_0:
- return INT8;
- default:
- GGML_ABORT("%s: fatal: unable to determine zTensor data type",
- __func__);
- break;
- }
-}
+static void ggml_zdnn_compute_forward_mul_mat(
+ const ggml_backend_zdnn_context * ctx,
+ ggml_tensor * dst) {
-inline void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc,
- zdnn_tensor_desc & tfm_desc,
- zdnn_ztensor & ztensor,
- const ggml_tensor * src,
- const int64_t * ne,
- const zdnn_data_layouts layout) {
- zdnn_init_pre_transformed_desc(
- layout,
- ggml_zdnn_type_mapping(src->type),
- &pre_tfm_desc,
- ne[3], ne[2], ne[1], ne[0]
- );
+ const ggml_tensor * src0 = dst->src[0]; // weights
+ const ggml_tensor * src1 = dst->src[1]; // inputs
- ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc, &tfm_desc));
- ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc, &tfm_desc, &ztensor));
+ // TODO: implement support for quantized types
+ // we currently only support f32, f16, and bf16
+ ggml_zdnn_mul_mat_f(ctx, src0, src1, dst);
}
-inline void ggml_zdnn_load_tensor(zdnn_ztensor & ztensor,
- void * buffer) {
- ZDNN_CHECK(zdnn_transform_ztensor(&ztensor, buffer));
-}
+static bool ggml_zdnn_compute_forward(
+ ggml_backend_zdnn_context * ctx,
+ ggml_tensor * dst) {
-inline void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_tensor * tensor) {
- switch (tensor->op) {
+ switch (dst->op) {
case GGML_OP_MUL_MAT:
{
- zdnn_init_pre_transformed_desc(
- ZDNN_2D,
- ggml_zdnn_type_mapping(tensor->type),
- &buffer->pre_tfm_desc,
- tensor->ne[1], tensor->ne[0]
- );
+ ggml_zdnn_compute_forward_mul_mat(ctx, dst);
} break;
- default:
- {
- // For 4D tensors, GGML uses NCHW layout. However, because zDNN
- // automatically transforms everything to NHWC, we will use it
- // directly to avoid the performance penalty changing the
- // layout and reshaping the tensor.
- zdnn_init_pre_transformed_desc(
- ZDNN_NHWC,
- ggml_zdnn_type_mapping(tensor->type),
- &buffer->pre_tfm_desc,
- tensor->ne[3], tensor->ne[2], tensor->ne[1], tensor->ne[0]
- );
-
- // TODO: Consider adding a ggml check.
- // TODO: If tensor = 4D, use ZDNN_NCHW by default.
- // TODO: If tensor = 2D, use ZDNN_NHWC by default.
- } break;
- }
-
- ZDNN_CHECK(zdnn_generate_transformed_desc(&buffer->pre_tfm_desc, &buffer->tfm_desc));
- ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&buffer->pre_tfm_desc, &buffer->tfm_desc, &buffer->ztensor));
-}
-
-static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- GGML_TENSOR_BINARY_OP_LOCALS;
-
- const enum ggml_type type = src0->type;
-
- GGML_ASSERT(ne0 == ne01);
- GGML_ASSERT(ne1 == ne11);
- GGML_ASSERT(ne2 == ne12);
- GGML_ASSERT(ne3 == ne13);
-
- // we don't support permuted src0 or src1
- GGML_ASSERT(nb00 == ggml_type_size(type));
- GGML_ASSERT(nb10 == ggml_type_size(src1->type));
-
- // dst cannot be transposed or permuted
- GGML_ASSERT(nb0 == sizeof(float));
- GGML_ASSERT(nb0 <= nb1);
- GGML_ASSERT(nb1 <= nb2);
- GGML_ASSERT(nb2 <= nb3);
-
- const ggml_tensor * weights = src0;
- const ggml_tensor * inputs = src1;
- ggml_tensor * output = dst;
-
- ggml_backend_zdnn_buffer * weights_extra = (ggml_backend_zdnn_buffer *)weights->extra;
- ggml_backend_zdnn_buffer * inputs_extra = (ggml_backend_zdnn_buffer *)inputs->extra;
- ggml_backend_zdnn_buffer * output_extra = (ggml_backend_zdnn_buffer *)output->extra;
- ggml_backend_zdnn_buffer * bias_extra = (ggml_backend_zdnn_buffer *)output_extra->extra;
-
- const int64_t weights_rows = ne01;
- const int64_t weights_cols = ne00;
- const int64_t inputs_rows = ne11;
- const int64_t inputs_cols = ne10;
-
- assert(inputs_cols == weights_cols);
-
- const int64_t output_rows = ne1;
- const int64_t output_cols = ne0;
-
- // GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
- // __func__, weights_extra->name,
- // weights->ne[3], weights->ne[2], weights->ne[1], weights->ne[0],
- // weights_extra->pre_tfm_desc.dim1,
- // weights_extra->pre_tfm_desc.dim2,
- // weights_extra->pre_tfm_desc.dim3,
- // weights_extra->pre_tfm_desc.dim4);
-
- // GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
- // __func__, inputs_extra->name,
- // inputs->ne[3], inputs->ne[2], inputs->ne[1], inputs->ne[0],
- // inputs_extra->pre_tfm_desc.dim1,
- // inputs_extra->pre_tfm_desc.dim2,
- // inputs_extra->pre_tfm_desc.dim3,
- // inputs_extra->pre_tfm_desc.dim4);
-
- GGML_ASSERT(weights_extra->pre_tfm_desc.dim1 == weights->ne[0] && "weights_extra->pre_tfm_desc.dim1 must match weights->ne[0]");
- GGML_ASSERT(weights_extra->pre_tfm_desc.dim2 == weights->ne[1] && "weights_extra->pre_tfm_desc.dim2 must match weights->ne[1]");
- GGML_ASSERT(inputs_extra->pre_tfm_desc.dim1 == inputs->ne[0] && "inputs_extra->pre_tfm_desc.dim1 must match inputs->ne[0]");
- GGML_ASSERT(inputs_extra->pre_tfm_desc.dim2 == inputs->ne[1] && "inputs_extra->pre_tfm_desc.dim2 must match inputs->ne[1]");
-
- ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &bias_extra->ztensor,
- false, true, MATMUL_OP_ADDITION, &output_extra->ztensor));
- // TODO: Remove in the future as we are currently DLF16 -> FP32 then in the next op, FP32 -> DLF16 again. Inefficient.
- ZDNN_CHECK(zdnn_transform_origtensor(&output_extra->ztensor, output->data));
-
- GGML_UNUSED(ctx);
- GGML_UNUSED(weights_rows);
- GGML_UNUSED(weights_cols);
- GGML_UNUSED(inputs_rows);
- GGML_UNUSED(inputs_cols);
- GGML_UNUSED(output_rows);
- GGML_UNUSED(output_cols);
-}
-
-static void ggml_zdnn_mul_mat_dispatch(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
- // debug helpers
- // GGML_LOG_INFO("%s: use_mul_mat_vec = %d\n", __func__, use_mul_mat_vec);
- // GGML_LOG_INFO("%s: use_mul_mat_vec_q = %d\n", __func__, use_mul_mat_vec_q);
- // GGML_LOG_INFO("%s: use_mul_mat_q = %d\n", __func__, use_mul_mat_q);
- // GGML_LOG_INFO("%s: src0: %8d %8d %8d %8d\n", __func__, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]);
- // GGML_LOG_INFO("%s: %8d %8d %8d %8d\n", __func__, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]);
- // GGML_LOG_INFO("%s: src1: %8d %8d %8d %8d\n", __func__, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3]);
- // GGML_LOG_INFO("%s: %8d %8d %8d %8d\n", __func__, src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3]);
- // GGML_LOG_INFO("%s: src0 is contiguous %d, transposed %d, type = %s, name = %s\n", __func__, ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
- // GGML_LOG_INFO("%s: src1 is contiguous %d, transposed %d, type = %s, name = %s\n", __func__, ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
-
- ggml_zdnn_mul_mat_op(ctx, src0, src1, dst);
-}
-
-static bool ggml_zdnn_compute_forward(ggml_backend_zdnn_context * ctx, ggml_tensor * dst) {
- switch (dst->op) {
- case GGML_OP_MUL_MAT:
- ggml_zdnn_mul_mat_dispatch(ctx, dst->src[0], dst->src[1], dst);
- break;
-
default:
return false;
}
--- /dev/null
+#include "ggml.h"
+#include "mmf.hpp"
+
+void ggml_zdnn_mul_mat_f(
+ const ggml_backend_zdnn_context * ctx,
+ const ggml_tensor * src0,
+ const ggml_tensor * src1,
+ ggml_tensor * dst) {
+ GGML_TENSOR_BINARY_OP_LOCALS;
+
+ const enum ggml_type type = src0->type;
+
+ GGML_ASSERT(ne0 == ne01);
+ GGML_ASSERT(ne1 == ne11);
+ GGML_ASSERT(ne2 == ne12);
+ GGML_ASSERT(ne3 == ne13);
+
+ // we don't support permuted src0 or src1
+ GGML_ASSERT(nb00 == ggml_type_size(type));
+ GGML_ASSERT(nb10 == ggml_type_size(src1->type));
+
+ // dst cannot be transposed or permuted
+ GGML_ASSERT(nb0 == sizeof(float));
+ GGML_ASSERT(nb0 <= nb1);
+ GGML_ASSERT(nb1 <= nb2);
+ GGML_ASSERT(nb2 <= nb3);
+
+ const ggml_tensor * weights = src0;
+ const ggml_tensor * inputs = src1;
+ ggml_tensor * output = dst;
+
+ ggml_backend_zdnn_buffer * weights_extra = (ggml_backend_zdnn_buffer *)weights->extra;
+ ggml_backend_zdnn_buffer * inputs_extra = (ggml_backend_zdnn_buffer *)inputs->extra;
+ ggml_backend_zdnn_buffer * output_extra = (ggml_backend_zdnn_buffer *)output->extra;
+ ggml_backend_zdnn_buffer * bias_extra = (ggml_backend_zdnn_buffer *)output_extra->extra;
+
+ const int64_t weights_rows = ne01;
+ const int64_t weights_cols = ne00;
+ const int64_t inputs_rows = ne11;
+ const int64_t inputs_cols = ne10;
+
+ assert(inputs_cols == weights_cols);
+
+ const int64_t output_rows = ne1;
+ const int64_t output_cols = ne0;
+
+ // GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
+ // __func__, weights_extra->name,
+ // weights->ne[3], weights->ne[2], weights->ne[1], weights->ne[0],
+ // weights_extra->pre_tfm_desc.dim1,
+ // weights_extra->pre_tfm_desc.dim2,
+ // weights_extra->pre_tfm_desc.dim3,
+ // weights_extra->pre_tfm_desc.dim4);
+
+ // GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
+ // __func__, inputs_extra->name,
+ // inputs->ne[3], inputs->ne[2], inputs->ne[1], inputs->ne[0],
+ // inputs_extra->pre_tfm_desc.dim1,
+ // inputs_extra->pre_tfm_desc.dim2,
+ // inputs_extra->pre_tfm_desc.dim3,
+ // inputs_extra->pre_tfm_desc.dim4);
+
+ GGML_ASSERT(weights_extra->pre_tfm_desc.dim1 == weights->ne[0] && "weights_extra->pre_tfm_desc.dim1 must match weights->ne[0]");
+ GGML_ASSERT(weights_extra->pre_tfm_desc.dim2 == weights->ne[1] && "weights_extra->pre_tfm_desc.dim2 must match weights->ne[1]");
+ GGML_ASSERT(inputs_extra->pre_tfm_desc.dim1 == inputs->ne[0] && "inputs_extra->pre_tfm_desc.dim1 must match inputs->ne[0]");
+ GGML_ASSERT(inputs_extra->pre_tfm_desc.dim2 == inputs->ne[1] && "inputs_extra->pre_tfm_desc.dim2 must match inputs->ne[1]");
+
+ ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &bias_extra->ztensor,
+ false, true, MATMUL_OP_ADDITION, &output_extra->ztensor));
+ // TODO: Remove in the future as we are currently DLF16 -> FP32 then in the next op, FP32 -> DLF16 again. Inefficient.
+ ZDNN_CHECK(zdnn_transform_origtensor(&output_extra->ztensor, output->data));
+
+ GGML_UNUSED(ctx);
+ GGML_UNUSED(weights_rows);
+ GGML_UNUSED(weights_cols);
+ GGML_UNUSED(inputs_rows);
+ GGML_UNUSED(inputs_cols);
+ GGML_UNUSED(output_rows);
+ GGML_UNUSED(output_cols);
+}
--- /dev/null
+#ifndef GGML_ZDNN_MMF_HPP
+#define GGML_ZDNN_MMF_HPP
+
+#include "common.hpp"
+
+void ggml_zdnn_mul_mat_f(
+ const ggml_backend_zdnn_context * ctx,
+ const ggml_tensor * src0,
+ const ggml_tensor * src1,
+ ggml_tensor * dst);
+
+#endif // GGML_ZDNN_MMF_HPP
--- /dev/null
+#include "ggml.h"
+#include "utils.hpp"
+
+zdnn_data_types ggml_zdnn_type_mapping(ggml_type type) {
+ switch (type) {
+ case GGML_TYPE_F32:
+ return FP32;
+ case GGML_TYPE_F16:
+ return FP16;
+ case GGML_TYPE_BF16:
+ return BFLOAT;
+ case GGML_TYPE_Q8_0:
+ return INT8;
+ case GGML_TYPE_I8:
+ return INT8;
+ case GGML_TYPE_I32:
+ return INT32;
+ default:
+ GGML_ABORT("%s: fatal: unable to determine zTensor data type",
+ __func__);
+ break;
+ }
+}
+
+void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc,
+ zdnn_tensor_desc & tfm_desc,
+ zdnn_ztensor & ztensor,
+ const ggml_tensor * src,
+ const int64_t * ne,
+ const zdnn_data_layouts layout) {
+ zdnn_init_pre_transformed_desc(
+ layout,
+ ggml_zdnn_type_mapping(src->type),
+ &pre_tfm_desc,
+ ne[3], ne[2], ne[1], ne[0]
+ );
+
+ ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc, &tfm_desc));
+ ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc, &tfm_desc, &ztensor));
+}
+
+void ggml_zdnn_load_tensor(zdnn_ztensor & ztensor, void * buffer) {
+ ZDNN_CHECK(zdnn_transform_ztensor(&ztensor, buffer));
+}
+
+void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_tensor * tensor) {
+ switch (tensor->op) {
+ case GGML_OP_MUL_MAT:
+ {
+ zdnn_init_pre_transformed_desc(
+ ZDNN_2D,
+ ggml_zdnn_type_mapping(tensor->type),
+ &buffer->pre_tfm_desc,
+ tensor->ne[1], tensor->ne[0]
+ );
+ } break;
+
+ default:
+ {
+ // For 4D tensors, GGML uses NCHW layout. However, because zDNN
+ // automatically transforms everything to NHWC, we will use it
+ // directly to avoid the performance penalty changing the
+ // layout and reshaping the tensor.
+ zdnn_init_pre_transformed_desc(
+ ZDNN_NHWC,
+ ggml_zdnn_type_mapping(tensor->type),
+ &buffer->pre_tfm_desc,
+ tensor->ne[3], tensor->ne[2], tensor->ne[1], tensor->ne[0]
+ );
+
+ // TODO: Consider adding a ggml check.
+ // TODO: If tensor = 4D, use ZDNN_NCHW by default.
+ // TODO: If tensor = 2D, use ZDNN_NHWC by default.
+ } break;
+ }
+
+ ZDNN_CHECK(zdnn_generate_transformed_desc(&buffer->pre_tfm_desc, &buffer->tfm_desc));
+ ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&buffer->pre_tfm_desc, &buffer->tfm_desc, &buffer->ztensor));
+}
--- /dev/null
+#ifndef GGML_ZDNN_UTILITIES_HPP
+#define GGML_ZDNN_UTILITIES_HPP
+
+#include "common.hpp"
+
+zdnn_data_types ggml_zdnn_type_mapping(ggml_type type);
+
+void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc,
+ zdnn_tensor_desc & tfm_desc,
+ zdnn_ztensor & ztensor,
+ const ggml_tensor * src,
+ const int64_t * ne,
+ const zdnn_data_layouts layout);
+
+void ggml_zdnn_load_tensor(zdnn_ztensor & ztensor, void * buffer);
+
+void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_tensor * tensor);
+
+#endif // GGML_ZDNN_UTILITIES_HPP