return false;
}
+ ggml_log_set(ggml_log_callback_default, nullptr);
+
auto & ctx = model.ctx_w;
// create the ggml context
#ifdef GGML_USE_METAL
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
- ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
model.backend = ggml_backend_metal_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
printf("%s: backend buffer size = %6.2f MB\n", __func__, buffer_size/(1024.0*1024.0));
}
+ ggml_log_set(ggml_log_callback_default, nullptr);
+
// create the ggml context
{
size_t n_tensors = 2 + 6 + 12*model.hparams.n_layer;
#ifdef GGML_USE_METAL
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
- ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
model.backend = ggml_backend_metal_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
void init_backends(gpt2_model & model, const gpt_params & params) {
ggml_backend_t gpu_backend = NULL;
+ ggml_log_set(ggml_log_callback_default, nullptr);
+
// initialize the backends
#ifdef GGML_USE_CUDA
if (params.n_gpu_layers > 0) {
#ifdef GGML_USE_METAL
if (params.n_gpu_layers > 0) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
- ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
gpu_backend = ggml_backend_metal_init();
if (!gpu_backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
ggml_backend_buffer_t buf_compute = nullptr;
mnist_model(const std::string & backend_name) {
- const size_t backend_index = ggml_backend_reg_find_by_name(backend_name.c_str());
- if (backend_index == SIZE_MAX) {
+ const ggml_backend_reg_t reg = ggml_backend_reg_by_name(backend_name.c_str());
+ if (reg == nullptr) {
fprintf(stderr, "%s: ERROR: backend %s not found, available:\n", __func__, backend_name.c_str());
- for (size_t i = 0; i < ggml_backend_reg_get_count(); ++i) {
- fprintf(stderr, " - %s\n", ggml_backend_reg_get_name(i));
+ for (size_t i = 0; i < ggml_backend_reg_count(); ++i) {
+ fprintf(stderr, " - %s\n", ggml_backend_reg_name(ggml_backend_reg_get(i)));
}
exit(1);
}
fprintf(stderr, "%s: using %s backend\n", __func__, backend_name.c_str());
- backend = ggml_backend_reg_init_backend(backend_index, nullptr);
+
+ ggml_backend_t backend = ggml_backend_init_by_name(backend_name.c_str(), NULL);
if (ggml_backend_is_cpu(backend)) {
const int ncores_logical = std::thread::hardware_concurrency();
ggml_backend_cpu_set_n_threads(backend, std::min(ncores_logical, (ncores_logical + 4)/2));
// initialize the tensors of the model in this case two matrices 2x2
void load_model(simple_model & model, float * a, float * b, int rows_A, int cols_A, int rows_B, int cols_B) {
+ ggml_log_set(ggml_log_callback_default, nullptr);
+
// initialize the backend
#ifdef GGML_USE_CUDA
fprintf(stderr, "%s: using CUDA backend\n", __func__);
#ifdef GGML_USE_METAL
fprintf(stderr, "%s: using Metal backend\n", __func__);
- ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
model.backend = ggml_backend_metal_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
int main() {
// enumerate backends
- printf("Testing %zu backends\n\n", ggml_backend_reg_get_count());
+ printf("Testing %zu backends\n\n", ggml_backend_reg_count());
- for (size_t i = 0; i < ggml_backend_reg_get_count(); i++) {
- printf("Backend %zu/%zu (%s)\n", i + 1, ggml_backend_reg_get_count(), ggml_backend_reg_get_name(i));
+ for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
+ const char * name = ggml_backend_reg_name(ggml_backend_reg_get(i));
+ printf("Backend %zu/%zu (%s)\n", i + 1, ggml_backend_reg_count(), name);
- ggml_backend_t backend = ggml_backend_reg_init_backend(i, NULL);
+ ggml_backend_t backend = ggml_backend_init_by_name(name, NULL);
GGML_ASSERT(backend != NULL);
printf(" Backend name: %s\n", ggml_backend_name(backend));
- test_buffer(backend, ggml_backend_reg_get_default_buffer_type(i));
+ test_buffer(backend, ggml_backend_get_default_buffer_type(backend));
ggml_backend_free(backend);
buffer_size += 16 * 32 * 32 * ggml_type_size(GGML_TYPE_F32); // tensor a_4
buffer_size += 197 * 32* ggml_type_size(GGML_TYPE_F32); // tensor b_4
-
-
-
-
buffer_size += 1024;
}
/*.no_alloc =*/ true,
};
+ ggml_log_set(ggml_log_callback_default, nullptr);
+
// initialize the backend
#ifdef GGML_USE_CUDA
if (use_gpu) {
#ifdef GGML_USE_METAL
if (use_gpu) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
- ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
model.backend = ggml_backend_metal_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor));
printf("%s: backend buffer size = %0.2f MB\n", __func__, (buffer_size/ 1024.f/ 1024.f));
+ ggml_log_set(ggml_log_callback_default, nullptr);
+
int num_tensors = 2;
struct ggml_init_params params {
/*.mem_size =*/ ggml_tensor_overhead() * num_tensors,
#ifdef GGML_USE_METAL
if (use_gpu) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
- ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
model.backend = ggml_backend_metal_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
/*.no_alloc =*/ true,
};
+ ggml_log_set(ggml_log_callback_default, nullptr);
+
// initialize the backend
#ifdef GGML_USE_CUDA
if (use_gpu) {
#ifdef GGML_USE_METAL
if (use_gpu) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
- ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
model.backend = ggml_backend_metal_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);