sampling : when top-k <= 0 -> noop (#13173)

author Georgi Gerganov <redacted>

Tue, 29 Apr 2025 17:22:57 +0000 (20:22 +0300)

committer GitHub <redacted>

Tue, 29 Apr 2025 17:22:57 +0000 (20:22 +0300)
author Georgi Gerganov <redacted>
Tue, 29 Apr 2025 17:22:57 +0000 (20:22 +0300)
committer GitHub <redacted>
Tue, 29 Apr 2025 17:22:57 +0000 (20:22 +0300)
diff --git a/include/llama.h b/include/llama.h

index a13350e15be6aaf3029fb9cce9b74caf01c31b18..06c56395c139fb8cc936542cae2a124c16cfba71 100644 (file)
--- a/include/llama.h
+++ b/include/llama.h
@@ -1232,6 +1232,7 @@ extern "C" {
          "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
  
      /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
+    /// Setting k <= 0 makes this a noop
      LLAMA_API struct llama_sampler * llama_sampler_init_top_k      (int32_t k);
  
      /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp

index d14979850285dbebe5320fb119d7e7736b16c11a..c0a5f9340d5851beade2deb00ef41bea2e38bc7e 100644 (file)
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
      // }
  
      if (k <= 0) {
-        k = cur_p->size;
+        return;
      }
  
      k = std::min(k, (int) cur_p->size);
@@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
          }
          cur_p->sorted = true;
      }
+
      cur_p->size = k;
  }
author	Georgi Gerganov <redacted>
	Tue, 29 Apr 2025 17:22:57 +0000 (20:22 +0300)
committer	GitHub <redacted>
	Tue, 29 Apr 2025 17:22:57 +0000 (20:22 +0300)
include/llama.h		patch \| blob \| history
src/llama-sampling.cpp		patch \| blob \| history