Immediately start processing the prompt before user input has been provided (#476)

author Georgi Gerganov <redacted>

Fri, 24 Mar 2023 21:17:58 +0000 (23:17 +0200)

committer GitHub <redacted>

Fri, 24 Mar 2023 21:17:58 +0000 (23:17 +0200)
author Georgi Gerganov <redacted>
Fri, 24 Mar 2023 21:17:58 +0000 (23:17 +0200)
committer GitHub <redacted>
Fri, 24 Mar 2023 21:17:58 +0000 (23:17 +0200)
diff --git a/alpaca.sh b/alpaca.sh

index 2f36d6f54a82d2dfeeefeb53bc981421a2e8ca67..d8a9f456ab287ea4aef383dd6fb2d5ccbe9476e6 100755 (executable)
--- a/alpaca.sh
+++ b/alpaca.sh
@@ -3,4 +3,4 @@
  # Temporary script - will be removed in the future
  #
  
-./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7
+./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins -b 256 --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7
diff --git a/chat.sh b/chat.sh

index 24a0f10ad306b8fc93f86b77072c422d9177cd6e..5531315b382bc35f467e0e9d86c250f74b637fc0 100755 (executable)
--- a/chat.sh
+++ b/chat.sh
@@ -3,4 +3,4 @@
  # Temporary script - will be removed in the future
  #
  
-./main -m ./models/7B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
+./main -m ./models/7B/ggml-model-q4_0.bin -b 128 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt
diff --git a/examples/chatLLaMa b/examples/chatLLaMa

index 97ababbc571e256e78aee7935a0bf6eb39354ef3..4265d7b66242706f82374d90ab8066c5e259a809 100755 (executable)
--- a/examples/chatLLaMa
+++ b/examples/chatLLaMa
@@ -13,7 +13,7 @@ N_PREDICTS="${N_PREDICTS:-2048}"
  
  # Note: you can also override the generation options by specifying them on the command line:
  # For example, override the context size by doing: ./chatLLaMa --ctx_size 1024
-GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --repeat_penalty 1.17647}"
+GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}"
  
  # shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS
  ./main $GEN_OPTIONS \
diff --git a/main.cpp b/main.cpp

index bc71a5494b2317909c911b1479a0bb7bbea50abb..3f49ad9970357cc190199832cecc8a330deba98f 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -372,7 +372,7 @@ int main(int argc, char ** argv) {
          n_past += embd.size();
          embd.clear();
  
-        if ((int) embd_inp.size() <= input_consumed) {
+        if ((int) embd_inp.size() <= input_consumed && !is_interacting) {
              // out of user input, sample next token
              const float top_k          = params.top_k;
              const float top_p          = params.top_p;
@@ -451,13 +451,16 @@ int main(int argc, char ** argv) {
              }
  
              // Check if each of the reverse prompts appears at the end of the output.
-            for (std::string antiprompt : params.antiprompt) {
+            for (std::string & antiprompt : params.antiprompt) {
                  if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) {
                      is_interacting = true;
+                    set_console_state(CONSOLE_STATE_USER_INPUT);
+                    fflush(stdout);
                      break;
                  }
              }
-            if (is_interacting) {
+
+            if (n_past > 0 && is_interacting) {
                  // potentially set color to indicate we are taking user input
                  set_console_state(CONSOLE_STATE_USER_INPUT);
  
@@ -495,7 +498,10 @@ int main(int argc, char ** argv) {
  
                  input_noecho = true; // do not echo this again
              }
-            is_interacting = false;
+
+            if (n_past > 0) {
+                is_interacting = false;
+            }
          }
  
          // end of text token
author	Georgi Gerganov <redacted>
	Fri, 24 Mar 2023 21:17:58 +0000 (23:17 +0200)
committer	GitHub <redacted>
	Fri, 24 Mar 2023 21:17:58 +0000 (23:17 +0200)
alpaca.sh		patch \| blob \| history
chat.sh		patch \| blob \| history
examples/chatLLaMa		patch \| blob \| history
main.cpp		patch \| blob \| history