vulkan: use a more appropriate amount of threads when generating shaders (llama/16418)

author Eve <redacted>

Sat, 4 Oct 2025 20:04:27 +0000 (20:04 +0000)

committer Georgi Gerganov <redacted>

Sun, 12 Oct 2025 08:16:23 +0000 (11:16 +0300)
author Eve <redacted>
Sat, 4 Oct 2025 20:04:27 +0000 (20:04 +0000)
committer Georgi Gerganov <redacted>
Sun, 12 Oct 2025 08:16:23 +0000 (11:16 +0300)
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

index e2726f1fa12b97f074718b7d050a113242807beb..f0cc24ff31e1efc3e1778d3b3b8589e2b8183439 100644 (file)
--- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@@ -1,5 +1,3 @@
-
-
  #include <iostream>
  #include <fstream>
  #include <sstream>
@@ -22,6 +20,7 @@
  #include <sys/types.h>
  
  #ifdef _WIN32
+    #define NOMINMAX
      #include <windows.h>
      #include <direct.h> // For _mkdir on Windows
  #else
@@ -306,7 +305,7 @@ using compile_count_guard = std::unique_ptr<uint32_t, decltype(&decrement_compil
  compile_count_guard acquire_compile_slot() {
      // wait until fewer than N compiles are in progress.
      // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
-    uint32_t N = 16;
+    uint32_t N = std::max(1u, std::min(16u, std::thread::hardware_concurrency()));
      std::unique_lock<std::mutex> guard(compile_count_mutex);
      compile_count_cond.wait(guard, [N] { return compile_count < N; });
      compile_count++;
author	Eve <redacted>
	Sat, 4 Oct 2025 20:04:27 +0000 (20:04 +0000)
committer	Georgi Gerganov <redacted>
	Sun, 12 Oct 2025 08:16:23 +0000 (11:16 +0300)