));
add_opt(common_arg(
{"--prio"}, "N",
- string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
+ string_format("set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: %d)\n", params.cpuparams.priority),
[](common_params & params, int prio) {
- if (prio < 0 || prio > 3) {
+ if (prio < GGML_SCHED_PRIO_LOW || prio > GGML_SCHED_PRIO_REALTIME) {
throw std::invalid_argument("invalid value");
}
params.cpuparams.priority = (enum ggml_sched_priority) prio;
DWORD p = NORMAL_PRIORITY_CLASS;
switch (prio) {
+ case GGML_SCHED_PRIO_LOW: p = BELOW_NORMAL_PRIORITY_CLASS; break;
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
int p = 0;
switch (prio) {
+ case GGML_SCHED_PRIO_LOW: p = 5; break;
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
case GGML_SCHED_PRIO_HIGH: p = -10; break;
// This is up to the applications.
DWORD p = THREAD_PRIORITY_NORMAL;
switch (prio) {
+ case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
}
+ if (prio != GGML_SCHED_PRIO_LOW) {
+ // Tell Windows that this thread should not be throttled (needs its own CPU core).
+ // Newer Windows 11 versions aggresively park (offline) CPU cores and often place
+ // all our threads onto the first 4 cores which results in terrible performance with
+ // n_threads > 4
+ #if _WIN32_WINNT >= 0x0602
+ THREAD_POWER_THROTTLING_STATE t;
+ ZeroMemory(&t, sizeof(t));
+ t.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
+ t.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
+ t.StateMask = 0;
+
+ if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &t, sizeof(t))) {
+ GGML_LOG_DEBUG("failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
+ return false;
+ }
+ #endif
+ }
+
if (prio == GGML_SCHED_PRIO_NORMAL) {
// Keep inherited policy/priority
return true;
struct sched_param p;
int32_t policy = SCHED_OTHER;
switch (prio) {
+ // TODO: there seems to be no way to set lower prio on Apple platforms
+ case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
struct sched_param p;
int32_t policy = SCHED_OTHER;
switch (prio) {
+ case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
printf(" --numa <distribute|isolate|numactl> numa mode (default: disabled)\n");
printf(" -r, --repetitions <n> number of times to repeat each test (default: %d)\n",
cmd_params_defaults.reps);
- printf(" --prio <0|1|2|3> process/thread priority (default: %d)\n",
+ printf(" --prio <-1|0|1|2|3> process/thread priority (default: %d)\n",
cmd_params_defaults.prio);
printf(" --delay <0...N> (seconds) delay between each test (default: %d)\n",
cmd_params_defaults.delay);