#include <future>
#include <vector>
+#include <cstring>
#if defined(GGML_USE_ACCELERATE)
# include <Accelerate/Accelerate.h>
#endif
};
-// helper function to determine if it is better to use BLAS or not
-// for large matrices, BLAS is faster
-static bool ggml_backend_blas_use_blas(const struct ggml_tensor * dst) {
- const struct ggml_tensor * src0 = dst->src[0];
- const struct ggml_tensor * src1 = dst->src[1];
-
- const int64_t ne10 = src1->ne[0];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
-
- // TODO: find the optimal values for these
- if (ggml_is_contiguous(src0) &&
- ggml_is_contiguous(src1) &&
- src1->type == GGML_TYPE_F32 &&
- (ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
-
- /*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
- return true;
- }
-
- return false;
-}
-
static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1];
// backend interface
-static const char * ggml_backend_blas_name(ggml_backend_t backend) {
+static const char * ggml_backend_blas_get_name(ggml_backend_t backend) {
return "BLAS";
GGML_UNUSED(backend);
GGML_UNUSED(backend);
}
-static bool ggml_backend_blas_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
- const struct ggml_tensor * src0 = op->src[0];
- const struct ggml_tensor * src1 = op->src[1];
-
- return (op->op == GGML_OP_MUL_MAT && ggml_backend_blas_use_blas(op)) ||
- (op->op == GGML_OP_OUT_PROD && op->src[0]->type == GGML_TYPE_F32 &&
- op->src[1]->type == GGML_TYPE_F32 &&
- ggml_is_matrix(src0) &&
- ggml_is_matrix(src1) &&
- ggml_is_contiguous(src0) &&
- (ggml_is_contiguous(src1) || ggml_is_transposed(src1)));
-
- GGML_UNUSED(backend);
-}
-
-static bool ggml_backend_blas_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
- return ggml_backend_buft_is_host(buft);
-
- GGML_UNUSED(backend);
-}
-
static struct ggml_backend_i blas_backend_i = {
- /* .get_name = */ ggml_backend_blas_name,
+ /* .get_name = */ ggml_backend_blas_get_name,
/* .free = */ ggml_backend_blas_free,
/* .get_default_buffer_type = */ ggml_backend_blas_get_default_buffer_type,
/* .set_tensor_async = */ NULL,
/* .graph_plan_update = */ NULL,
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_blas_graph_compute,
- /* .supports_op = */ ggml_backend_blas_supports_op,
- /* .supports_buft = */ ggml_backend_blas_supports_buft,
+ /* .supports_op = */ NULL,
+ /* .supports_buft = */ NULL,
/* .offload_op = */ NULL,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
ggml_backend_t backend = new ggml_backend {
/* .guid = */ ggml_backend_blas_guid(),
/* .interface = */ blas_backend_i,
- /* .device = */ nullptr,
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0),
/* .context = */ ctx,
};
ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
ctx->n_threads = n_threads;
}
+
+// device interface
+
+static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) {
+ return "BLAS";
+
+ GGML_UNUSED(dev);
+}
+
+static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) {
+ #if defined(GGML_USE_ACCELERATE)
+ return "Accelerate";
+ #elif defined(GGML_BLAS_USE_MKL)
+ return "MKL";
+ #elif defined(GGML_BLAS_USE_BLIS)
+ return "BLIS";
+ #elif defined(GGML_BLAS_USE_NVPL)
+ return "NVPL";
+ #elif defined(OPENBLAS_VERSION)
+ return "OpenBLAS";
+ #else
+ return "BLAS";
+ #endif
+
+ GGML_UNUSED(dev);
+}
+
+static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
+ // TODO
+ *free = 0;
+ *total = 0;
+
+ GGML_UNUSED(dev);
+}
+
+static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) {
+ return GGML_BACKEND_DEVICE_TYPE_CPU;
+
+ GGML_UNUSED(dev);
+}
+
+static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
+ props->name = ggml_backend_blas_device_get_name(dev);
+ props->description = ggml_backend_blas_device_get_description(dev);
+ props->type = ggml_backend_blas_device_get_type(dev);
+ ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total);
+ props->caps = {
+ /* .async = */ false,
+ /* .host_buffer = */ false,
+ /* .buffer_from_host_ptr = */ true,
+ /* .events = */ false,
+ };
+}
+
+static ggml_backend_t ggml_backend_blas_device_init(ggml_backend_dev_t dev, const char * params) {
+ return ggml_backend_blas_init();
+
+ GGML_UNUSED(dev);
+ GGML_UNUSED(params);
+}
+
+static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) {
+ return ggml_backend_cpu_buffer_type();
+
+ GGML_UNUSED(dev);
+}
+
+static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
+ return ggml_backend_cpu_buffer_from_ptr(ptr, size);
+
+ GGML_UNUSED(dev);
+ GGML_UNUSED(max_tensor_size);
+}
+
+static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
+ const struct ggml_tensor * src0 = op->src[0];
+ const struct ggml_tensor * src1 = op->src[1];
+
+ switch (op->op) {
+ case GGML_OP_NONE:
+ case GGML_OP_RESHAPE:
+ case GGML_OP_VIEW:
+ case GGML_OP_PERMUTE:
+ case GGML_OP_TRANSPOSE:
+ return true;
+
+ case GGML_OP_MUL_MAT:
+ {
+ // BLAS usually is only faster for large matrices
+ const struct ggml_tensor * src0 = op->src[0];
+ const struct ggml_tensor * src1 = op->src[1];
+
+ const int64_t ne10 = src1->ne[0];
+
+ const int64_t ne0 = op->ne[0];
+ const int64_t ne1 = op->ne[1];
+
+ // TODO: find the optimal value
+ const int64_t min_batch = 32;
+
+ return (ggml_is_contiguous(src0) &&
+ ggml_is_contiguous(src1) &&
+ src1->type == GGML_TYPE_F32 &&
+ (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch));
+ }
+
+ case GGML_OP_OUT_PROD:
+ return (op->src[0]->type == GGML_TYPE_F32 &&
+ op->src[1]->type == GGML_TYPE_F32 &&
+ ggml_is_matrix(src0) &&
+ ggml_is_matrix(src1) &&
+ ggml_is_contiguous(src0) &&
+ (ggml_is_contiguous(src1) || ggml_is_transposed(src1)));
+
+ default:
+ return false;
+
+ }
+
+ GGML_UNUSED(dev);
+}
+
+static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
+ return ggml_backend_buft_is_host(buft);
+
+ GGML_UNUSED(dev);
+}
+
+static const struct ggml_backend_device_i ggml_backend_blas_device_i = {
+ /* .get_name = */ ggml_backend_blas_device_get_name,
+ /* .get_description = */ ggml_backend_blas_device_get_description,
+ /* .get_memory = */ ggml_backend_blas_device_get_memory,
+ /* .get_type = */ ggml_backend_blas_device_get_type,
+ /* .get_props = */ ggml_backend_blas_device_get_props,
+ /* .init_backend = */ ggml_backend_blas_device_init,
+ /* .get_buffer_type = */ ggml_backend_blas_device_get_buffer_type,
+ /* .get_host_buffer_type = */ NULL,
+ /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_ptr,
+ /* .supports_op = */ ggml_backend_blas_device_supports_op,
+ /* .supports_buft = */ ggml_backend_blas_device_supports_buft,
+ /* .offload_op = */ NULL,
+ /* .event_new = */ NULL,
+ /* .event_free = */ NULL,
+ /* .event_synchronize = */ NULL,
+};
+
+// backend reg interface
+
+static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) {
+ return "BLAS";
+
+ GGML_UNUSED(reg);
+}
+
+static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) {
+ return 1;
+
+ GGML_UNUSED(reg);
+}
+
+static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) {
+ GGML_ASSERT(index == 0);
+
+ static ggml_backend_device ggml_backend_blas_device = {
+ /* .iface = */ ggml_backend_blas_device_i,
+ /* .reg = */ reg,
+ /* .context = */ nullptr,
+ };
+
+ return &ggml_backend_blas_device;
+
+ GGML_UNUSED(reg);
+ GGML_UNUSED(index);
+}
+
+static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) {
+ if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
+ return (void *)ggml_backend_blas_set_n_threads;
+ }
+ return NULL;
+
+ GGML_UNUSED(reg);
+ GGML_UNUSED(name);
+}
+
+static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
+ /* .get_name = */ ggml_backend_blas_reg_get_name,
+ /* .get_device_count = */ ggml_backend_blas_reg_get_device_count,
+ /* .get_device = */ ggml_backend_blas_reg_get_device,
+ /* .get_proc_address = */ ggml_backend_blas_get_proc_address,
+};
+
+ggml_backend_reg_t ggml_backend_blas_reg(void) {
+ static struct ggml_backend_reg ggml_backend_blas_reg = {
+ /* .iface = */ ggml_backend_blas_reg_i,
+ /* .context = */ NULL,
+ };
+
+ return &ggml_backend_blas_reg;
+}