From: the-crypt-keeper <redacted>
Date: Mon, 10 Jul 2023 18:41:58 +0000 (-0400)
Subject: starcoder : add <|end_of_turn|> token handling in order to support openchat/opencoder... 
X-Git-Tag: upstream/0.0.1642~1341
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=d40d7ff57bd1ac27eec1a3bf1ae46e428a038252;p=pkg%2Fggml%2Fsources%2Fggml

starcoder : add <|end_of_turn|> token handling in order to support openchat/opencoderplus (#343)

* Add <|end_of_turn|> token handling to support openchat/opencoderplus

* The opencoder EOT occurs inside the prompt, so we should only break if the model actually generated it

---------

Co-authored-by: Mike <redacted>
---

diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp
index c5007304..d84e3663 100644
--- a/examples/starcoder/main.cpp
+++ b/examples/starcoder/main.cpp
@@ -153,7 +153,8 @@ bool starcoder_model_load(const std::string & fname, starcoder_model & model, gp
                 "<fim-prefix>",
                 "<fim-middle>",
                 "<fim-suffix>",
-                "<fim-pad>"
+                "<fim-pad>",
+                "<|end_of_turn|>"
             }) {
             if (vocab.token_to_id.find(token) != vocab.token_to_id.end()) {
                 vocab.add_special_token(token);
@@ -813,12 +814,17 @@ int main(int argc, char ** argv) {
     }
     printf("\n\n");
 
-    // Handle StarChat "<|end|>" token.
+    // Handle StarChat "<|end|>" and OpenCoder "<|end_of_turn>" tokens.
     gpt_vocab::id starchat_end_token = -1;
     {
         const auto it = vocab.token_to_id.find("<|end|>");
         if (it != vocab.token_to_id.end()) {
             starchat_end_token = it->second;
+        } else {
+            const auto eot_token_id = vocab.token_to_id.find("<|end_of_turn|>");
+            if (eot_token_id != vocab.token_to_id.end()) {
+              starchat_end_token = eot_token_id->second;
+            }
         }
     }
 
@@ -898,7 +904,7 @@ int main(int argc, char ** argv) {
             break;
         }
         // Handle StarChat "<|end|>" token.
-        else if (embd.back() == starchat_end_token) {
+        else if (embd.back() == starchat_end_token && i >= embd_inp.size()) {
             break;
         }
     }