llama-fit-params: QoL impr. for prints/errors (#18089)

author Johannes Gäßler <redacted>

Tue, 16 Dec 2025 23:03:19 +0000 (00:03 +0100)

committer GitHub <redacted>

Tue, 16 Dec 2025 23:03:19 +0000 (00:03 +0100)
author Johannes Gäßler <redacted>
Tue, 16 Dec 2025 23:03:19 +0000 (00:03 +0100)
committer GitHub <redacted>
Tue, 16 Dec 2025 23:03:19 +0000 (00:03 +0100)
diff --git a/tools/fit-params/fit-params.cpp b/tools/fit-params/fit-params.cpp

index fbf7a2eb376bd1416c84a06f7aa47c7972493a96..2c113c453e3a6531c5e14ec03848d2959d758fa2 100644 (file)
--- a/tools/fit-params/fit-params.cpp
+++ b/tools/fit-params/fit-params.cpp
@@ -4,7 +4,11 @@
  #include "common.h"
  #include "log.h"
  
-#include <iostream>
+#include <chrono>
+#include <cinttypes>
+#include <thread>
+
+using namespace std::chrono_literals;
  
  #if defined(_MSC_VER)
  #pragma warning(disable: 4244 4267) // possible loss of data
@@ -22,13 +26,17 @@ int main(int argc, char ** argv) {
      llama_numa_init(params.numa);
      auto mparams = common_model_params_to_llama(params);
      auto cparams = common_context_params_to_llama(params);
-    llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
+    const bool success = llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
          params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx,
          params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
+    if (!success) {
+        LOG_ERR("%s: failed to fit CLI arguments to free memory, exiting...\n", __func__);
+        exit(1);
+    }
  
-    LOG_INF("Printing fitted CLI arguments to stdout...\n");
-    std::cout << "-c "    << cparams.n_ctx;
-    std::cout << " -ngl " << mparams.n_gpu_layers;
+    LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__);
+    std::this_thread::sleep_for(10ms); // to avoid a race between stderr and stdout
+    printf("-c %" PRIu32 " -ngl %" PRIu32, cparams.n_ctx, mparams.n_gpu_layers);
  
      size_t nd = llama_max_devices();
      while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) {
@@ -37,26 +45,22 @@ int main(int argc, char ** argv) {
      if (nd > 1) {
          for (size_t id = 0; id < nd; id++) {
              if (id == 0) {
-                std::cout << " -ts ";
+                printf(" -ts ");
              }
-            if (id > 0) {
-                std::cout << ",";
-            }
-            std::cout << mparams.tensor_split[id];
+            printf("%s%" PRIu32, id > 0 ? "," : "", uint32_t(mparams.tensor_split[id]));
          }
      }
  
      const size_t ntbo = llama_max_tensor_buft_overrides();
+    bool any_tbo = false;
      for (size_t itbo = 0; itbo < ntbo && mparams.tensor_buft_overrides[itbo].pattern != nullptr; itbo++) {
          if (itbo == 0) {
-            std::cout << " -ot ";
-        }
-        if (itbo > 0) {
-            std::cout << ",";
+            printf(" -ot \"");
          }
-        std::cout << mparams.tensor_buft_overrides[itbo].pattern << "=" << ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft);
+        printf("%s%s=%s", itbo > 0 ? "," : "", mparams.tensor_buft_overrides[itbo].pattern, ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft));
+        any_tbo = true;
      }
-    std::cout << "\n";
+    printf("%s\n", any_tbo ? "\"" : "");
  
      return 0;
  }
author	Johannes Gäßler <redacted>
	Tue, 16 Dec 2025 23:03:19 +0000 (00:03 +0100)
committer	GitHub <redacted>
	Tue, 16 Dec 2025 23:03:19 +0000 (00:03 +0100)