Replace EOS with newline to prevent context/memory being flushed by EOS in interactiv...

author rabidcopy <redacted>

Thu, 23 Mar 2023 20:22:47 +0000 (15:22 -0500)

committer GitHub <redacted>

Thu, 23 Mar 2023 20:22:47 +0000 (22:22 +0200)
author rabidcopy <redacted>
Thu, 23 Mar 2023 20:22:47 +0000 (15:22 -0500)
committer GitHub <redacted>
Thu, 23 Mar 2023 20:22:47 +0000 (22:22 +0200)
diff --git a/main.cpp b/main.cpp

index 431c94b52ebc5d8f024cf452eb87140a178550c7..5ba6d5a7561dcff65732b8737b94605961a38994 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -258,6 +258,9 @@ int main(int argc, char ** argv) {
          params.interactive = true;
      }
  
+    // determine newline token
+    auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
+
      fprintf(stderr, "\n");
      fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
      fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
@@ -359,6 +362,16 @@ int main(int argc, char ** argv) {
                  last_n_tokens.push_back(id);
              }
  
+            // replace end of text token with newline token when in interactive mode
+            if (id == llama_token_eos() && params.interactive) {
+                id = llama_token_newline.front();
+                if (params.antiprompt.size() != 0) {
+                    // tokenize and inject first reverse prompt
+                    const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
+                    embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
+                }
+            }
+
              // add it to the context
              embd.push_back(id);
  
@@ -451,12 +464,8 @@ int main(int argc, char ** argv) {
  
          // end of text token
          if (embd.back() == llama_token_eos()) {
-            if (params.interactive) {
-                is_interacting = true;
-            } else {
-                fprintf(stderr, " [end of text]\n");
-                break;
-            }
+            fprintf(stderr, " [end of text]\n");
+            break;
          }
  
          // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
author	rabidcopy <redacted>
	Thu, 23 Mar 2023 20:22:47 +0000 (15:22 -0500)
committer	GitHub <redacted>
	Thu, 23 Mar 2023 20:22:47 +0000 (22:22 +0200)