From: Alexey Parfenov <redacted>
Date: Wed, 16 Oct 2024 08:35:53 +0000 (+0000)
Subject: server : fix the disappearance of the end of the text (#9867)
X-Git-Tag: upstream/0.0.4488~561
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=1f66b699c48cb5ab3265ed72c48e8549b1674291;p=pkg%2Fggml%2Fsources%2Fllama.cpp

server : fix the disappearance of the end of the text (#9867)

* server: fix the disappearance of the end of the text when streaming with stop strings

* simplify "send text" checks
---

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index d53cca84c..b5e63384c 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1090,22 +1090,21 @@ struct server_context {
             size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
 
             const std::string str_test = slot.generated_text.substr(pos);
-            bool is_stop_full = false;
+            bool send_text = true;
 
             size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_FULL);
             if (stop_pos != std::string::npos) {
-                is_stop_full = true;
                 slot.generated_text.erase(
                     slot.generated_text.begin() + pos + stop_pos,
                     slot.generated_text.end());
                 pos = std::min(slot.n_sent_text, slot.generated_text.size());
-            } else {
-                is_stop_full = false;
+            } else if (slot.has_next_token) {
                 stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_PARTIAL);
+                send_text = stop_pos == std::string::npos;
             }
 
             // check if there is any token to predict
-            if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0)) {
+            if (send_text) {
                 // no send the stop word in the response
                 result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
                 slot.n_sent_text += result.text_to_send.size();