sampling : make top_n_sigma no-op at <=0 or a single candidate (#13345)

author DocShotgun <redacted>

Tue, 6 May 2025 20:36:24 +0000 (13:36 -0700)

committer GitHub <redacted>

Tue, 6 May 2025 20:36:24 +0000 (22:36 +0200)
author DocShotgun <redacted>
Tue, 6 May 2025 20:36:24 +0000 (13:36 -0700)
committer GitHub <redacted>
Tue, 6 May 2025 20:36:24 +0000 (22:36 +0200)
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp

index 2869f60d204a148bcd47b622b1a18e00580831cc..804b11e0a943e9625c78516c5da629ec91261968 100644 (file)
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -1750,7 +1750,7 @@ static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler *
  static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
      const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
  
-    if (ctx->n < 0.0f) {
+    if (ctx->n <= 0.0f || cur_p->size <= 1) {
          return;
      }
  
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp

index f1f87d454d464afd8992661a490343dc6e949844..60ac62b385f352876295b23bbd257ce216b9a9ee 100644 (file)
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -360,7 +360,7 @@ int main(void) {
      test_dry({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 3, 4, 0, 1}, {0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, 1.0f, 1.1f, 4, 7, {});
  
      test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.571429f, 0.428571f, 0.0f, 0.0f}, 1.00f);
-    test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.00f);
+    test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 0.00f); // top_n_sigma == 0 now represents a no-op rather than greedy decoding as of PR#13345
      test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 3.00f);
  
      test_sampler_queue(10000, "k", 10000, 1.0f, 1.0f);
author	DocShotgun <redacted>
	Tue, 6 May 2025 20:36:24 +0000 (13:36 -0700)
committer	GitHub <redacted>
	Tue, 6 May 2025 20:36:24 +0000 (22:36 +0200)
src/llama-sampling.cpp		patch \| blob \| history
tests/test-sampling.cpp		patch \| blob \| history