fix server crashes (#2076)

author Henri Vasserman <redacted>

Mon, 3 Jul 2023 21:05:23 +0000 (00:05 +0300)

committer GitHub <redacted>

Mon, 3 Jul 2023 21:05:23 +0000 (00:05 +0300)
author Henri Vasserman <redacted>
Mon, 3 Jul 2023 21:05:23 +0000 (00:05 +0300)
committer GitHub <redacted>
Mon, 3 Jul 2023 21:05:23 +0000 (00:05 +0300)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index e4ddbe9865506a1a8504fbedfcab44603f49e1e5..3bf98595763e2254f850398c6dab8f42a035860f 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -906,7 +906,7 @@ int main(int argc, char ** argv) {
  
              while (llama.has_next_token) {
                  const completion_token_output token_with_probs = llama.doCompletion();
-                const std::string token_text = llama_token_to_str(llama.ctx, token_with_probs.tok);
+                const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(llama.ctx, token_with_probs.tok);
  
                  stop_pos = llama.findStoppingStrings(llama.generated_text,
                      token_text.size(), STOP_FULL);
@@ -933,7 +933,7 @@ int main(int argc, char ** argv) {
  
                  while (llama.has_next_token) {
                      const completion_token_output token_with_probs = llama.doCompletion();
-                    const std::string token_text = llama_token_to_str(llama.ctx, token_with_probs.tok);
+                    const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(llama.ctx, token_with_probs.tok);
                      if (llama.multibyte_pending > 0) {
                          continue;
                      }
author	Henri Vasserman <redacted>
	Mon, 3 Jul 2023 21:05:23 +0000 (00:05 +0300)
committer	GitHub <redacted>
	Mon, 3 Jul 2023 21:05:23 +0000 (00:05 +0300)