server : fix handling of the ignore_eos flag (#14710)

author Georgi Gerganov <redacted>

Wed, 16 Jul 2025 09:13:57 +0000 (12:13 +0300)

committer GitHub <redacted>

Wed, 16 Jul 2025 09:13:57 +0000 (12:13 +0300)
author Georgi Gerganov <redacted>
Wed, 16 Jul 2025 09:13:57 +0000 (12:13 +0300)
committer GitHub <redacted>
Wed, 16 Jul 2025 09:13:57 +0000 (12:13 +0300)
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index d4dffb39c8d16a333dcc75e3657639d08e49c893..1e7d64a2852375ab0b8ea53c59a1f6cd34604af8 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -127,7 +127,6 @@ struct slot_params {
      std::vector<std::string> response_fields;
      bool timings_per_token = false;
      bool post_sampling_probs = false;
-    bool ignore_eos = false;
  
      struct common_params_sampling sampling;
      struct common_params_speculative speculative;
@@ -441,7 +440,6 @@ struct server_task {
  
          {
              params.sampling.logit_bias.clear();
-            params.ignore_eos = json_value(data, "ignore_eos", false);
  
              const auto & logit_bias = data.find("logit_bias");
              if (logit_bias != data.end() && logit_bias->is_array()) {
@@ -472,6 +470,16 @@ struct server_task {
                      }
                  }
              }
+
+            params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos);
+            if (params.sampling.ignore_eos) {
+                for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) {
+                    if (llama_vocab_is_eog(vocab, i)) {
+                        //SRV_DBG("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(ctx, i).c_str(), -INFINITY);
+                        params.sampling.logit_bias.push_back({i, -INFINITY});
+                    }
+                }
+            }
          }
  
          {
@@ -2217,10 +2225,6 @@ struct server_context {
              slot.params.n_predict = slot.n_predict;
          }
  
-        if (slot.params.ignore_eos && has_eos_token) {
-            slot.params.sampling.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
-        }
-
          {
              if (slot.smpl != nullptr) {
                  common_sampler_free(slot.smpl);
author	Georgi Gerganov <redacted>
	Wed, 16 Jul 2025 09:13:57 +0000 (12:13 +0300)
committer	GitHub <redacted>
	Wed, 16 Jul 2025 09:13:57 +0000 (12:13 +0300)