main : add parameter --no-display-prompt (#4541)

author Yann Follet <redacted>

Sat, 13 Jan 2024 16:09:08 +0000 (00:09 +0800)

committer GitHub <redacted>

Sat, 13 Jan 2024 16:09:08 +0000 (18:09 +0200)
author Yann Follet <redacted>
Sat, 13 Jan 2024 16:09:08 +0000 (00:09 +0800)
committer GitHub <redacted>
Sat, 13 Jan 2024 16:09:08 +0000 (18:09 +0200)
diff --git a/common/common.cpp b/common/common.cpp

index 322b9f91e504149ef1a48da4caa5d42a552a86ac..c11006bcb91755a5518098a7dd029ceaadb0e874 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -617,6 +617,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
              params.numa = true;
          } else if (arg == "--verbose-prompt") {
              params.verbose_prompt = true;
+        } else if (arg == "--no-display-prompt") {
+            params.display_prompt = false;
          } else if (arg == "-r" || arg == "--reverse-prompt") {
              if (++i >= argc) {
                  invalid_param = true;
@@ -936,11 +938,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
      printf("  -mg i, --main-gpu i   the GPU to use for the model (with split-mode = none),\n");
      printf("                        or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu);
  #endif
+    printf("  --verbose-prompt      print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false");
+    printf("  --no-display-prompt   don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false");
      printf("  -gan N, --grp-attn-n N\n");
      printf("                        group-attention factor (default: %d)\n", params.grp_attn_n);
      printf("  -gaw N, --grp-attn-w N\n");
      printf("                        group-attention width (default: %.1f)\n", (double)params.grp_attn_w);
-    printf("  --verbose-prompt      print prompt before generation\n");
      printf("  -dkvc, --dump-kv-cache\n");
      printf("                        verbose print of the KV cache\n");
      printf("  -nkvo, --no-kv-offload\n");
@@ -1582,6 +1585,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
      fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p);
      fprintf(stream, "typical_p: %f # default: 1.0\n", sparams.typical_p);
      fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false");
+    fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false");
  }
  
  //
diff --git a/common/common.h b/common/common.h

index f29be5b5ab87fd6ebb002d2c6b167ab47e4832ca..096468243d88c1df2b557cc5c59d7a2309f91d22 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -126,6 +126,7 @@ struct gpt_params {
      bool use_mlock         = false; // use mlock to keep model in memory
      bool numa              = false; // attempt optimizations that help on some NUMA systems
      bool verbose_prompt    = false; // print prompt tokens before generation
+    bool display_prompt    = true;  // print prompt before generation
      bool infill            = false; // use infill mode
      bool dump_kv_cache     = false; // dump the KV cache contents for debugging purposes
      bool no_kv_offload     = false; // disable KV offloading
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index c53b29978657c7e1113e08eb7a1339f29e714bb2..58b7f807a9ccaa856d0659f0dd972fb196b60128 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -477,6 +477,7 @@ int main(int argc, char ** argv) {
  
      bool is_antiprompt        = false;
      bool input_echo           = true;
+    bool display              = true;
      bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size();
  
      int n_past             = 0;
@@ -491,6 +492,7 @@ int main(int argc, char ** argv) {
  
      // the first thing we will do is to output the prompt, so set color accordingly
      console::set_display(console::prompt);
+    display = params.display_prompt;
  
      std::vector<llama_token> embd;
      std::vector<llama_token> embd_guidance;
@@ -707,7 +709,7 @@ int main(int argc, char ** argv) {
          }
  
          // display text
-        if (input_echo) {
+        if (input_echo && display) {
              for (auto id : embd) {
                  const std::string token_str = llama_token_to_piece(ctx, id);
                  printf("%s", token_str.c_str());
@@ -724,6 +726,7 @@ int main(int argc, char ** argv) {
          // reset color to default if there is no pending user input
          if (input_echo && (int) embd_inp.size() == n_consumed) {
              console::set_display(console::reset);
+            display = true;
          }
  
          // if not currently processing queued inputs;
@@ -796,6 +799,7 @@ int main(int argc, char ** argv) {
  
                  // color user input only
                  console::set_display(console::user_input);
+                display = params.display_prompt;
  
                  std::string line;
                  bool another_line = true;
@@ -806,6 +810,7 @@ int main(int argc, char ** argv) {
  
                  // done taking input, reset color
                  console::set_display(console::reset);
+                display = true;
  
                  // Add tokens to embd only if the input buffer is non-empty
                  // Entering a empty line lets the user pass control back
author	Yann Follet <redacted>
	Sat, 13 Jan 2024 16:09:08 +0000 (00:09 +0800)
committer	GitHub <redacted>
	Sat, 13 Jan 2024 16:09:08 +0000 (18:09 +0200)
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history