]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
Respect the maximum number of tokens in interactive. (#298)
authortjohnman <redacted>
Sun, 19 Mar 2023 18:31:17 +0000 (19:31 +0100)
committerGitHub <redacted>
Sun, 19 Mar 2023 18:31:17 +0000 (20:31 +0200)
Co-authored-by: Johnman <redacted>
Co-authored-by: Georgi Gerganov <redacted>
main.cpp

index e18105624d282515757c0bdf49d246fdde2ee9be..57e924950048d45ae7ad3a2ab605d90a5a855ace 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -1062,7 +1062,6 @@ int main(int argc, char ** argv) {
         }
 
         // end of text token
-
         if (embd.back() == EOS_TOKEN_ID) {
             if (params.interactive) {
                 is_interacting = true;
@@ -1071,6 +1070,12 @@ int main(int argc, char ** argv) {
                 break;
             }
         }
+
+        // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
+        if (params.interactive && remaining_tokens <= 0) {
+            remaining_tokens = params.n_predict;
+            is_interacting = true;
+        }
     }
 
 #if defined (_WIN32)