From: Diego Devesa Date: Thu, 15 May 2025 13:46:55 +0000 (-0700) Subject: llama-bench : fix -ot with dl backends (#13563) X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=6c8b91500e75df6664278d1e9af3e39e8a2fb0d0;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama-bench : fix -ot with dl backends (#13563) --- diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index 53dbdda2..d77c4052 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -687,7 +687,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { invalid_param = true; break; } - auto value = argv[i]; + auto * value = argv[i]; /* static */ std::map buft_list; if (buft_list.empty()) { // enumerate all the devices and add their buffer types to the list @@ -719,7 +719,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { // memory leak present in the implementation // over in arg.cpp. Acceptable because we // only parse these args once in this program. - auto override_group = value; + auto * override_group = value; if (value[override_group_span_len] == '\0') { value = &value[override_group_span_len]; last_group = true; @@ -730,7 +730,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { std::vector group_tensor_buft_overrides{}; auto override_span_len = std::strcspn(override_group, ";"); while (override_span_len > 0) { - auto override = override_group; + auto * override = override_group; if (override_group[override_span_len] != '\0') { override_group[override_span_len] = '\0'; override_group = &override_group[override_span_len + 1]; @@ -743,9 +743,10 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { break; } override[tensor_name_span_len] = '\0'; - auto tensor_name = override; - auto buffer_type = &override[tensor_name_span_len + 1]; + auto * tensor_name = override; + auto * buffer_type = &override[tensor_name_span_len + 1]; if (buft_list.find(buffer_type) == buft_list.end()) { + printf("error: unrecognized buffer type '%s'\n", buffer_type); printf("Available buffer types:\n"); for (const auto & it : buft_list) { printf(" %s\n", ggml_backend_buft_name(it.second)); @@ -1826,10 +1827,11 @@ int main(int argc, char ** argv) { fprintf(stderr, "warning: sanitizer enabled, performance may be affected\n"); #endif - cmd_params params = parse_cmd_params(argc, argv); - // initialize backends ggml_backend_load_all(); + + cmd_params params = parse_cmd_params(argc, argv); + auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); if (!cpu_dev) { fprintf(stderr, "%s: error: CPU backend is not loaded\n", __func__);