main: add the possibility to open the prompt cache read-only (#1640)

author Willy Tarreau <redacted>

Wed, 7 Jun 2023 02:10:17 +0000 (04:10 +0200)

committer GitHub <redacted>

Wed, 7 Jun 2023 02:10:17 +0000 (22:10 -0400)
author Willy Tarreau <redacted>
Wed, 7 Jun 2023 02:10:17 +0000 (04:10 +0200)
committer GitHub <redacted>
Wed, 7 Jun 2023 02:10:17 +0000 (22:10 -0400)
diff --git a/examples/common.cpp b/examples/common.cpp

index c373462146b8ec155c8168733e58e4f57109c213..f5d886acf653931290e35b13d8d313a85c2dcff3 100644 (file)
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -132,6 +132,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
              params.path_prompt_cache = argv[i];
          } else if (arg == "--prompt-cache-all") {
              params.prompt_cache_all = true;
+        } else if (arg == "--prompt-cache-ro") {
+            params.prompt_cache_ro = true;
          } else if (arg == "-f" || arg == "--file") {
              if (++i >= argc) {
                  invalid_param = true;
@@ -432,6 +434,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
      fprintf(stderr, "  --prompt-cache FNAME  file to cache prompt state for faster startup (default: none)\n");
      fprintf(stderr, "  --prompt-cache-all    if specified, saves user input and generations to cache as well.\n");
      fprintf(stderr, "                        not supported with --interactive or other interactive options\n");
+    fprintf(stderr, "  --prompt-cache-ro     if specified, uses the prompt cache but does not update it.\n");
      fprintf(stderr, "  --random-prompt       start with a randomized prompt.\n");
      fprintf(stderr, "  --in-prefix STRING    string to prefix user inputs with (default: empty)\n");
      fprintf(stderr, "  --in-suffix STRING    string to suffix after user inputs with (default: empty)\n");
diff --git a/examples/common.h b/examples/common.h

index 12b497349fee5ac0b6539ecc762b176bbd3f0277..826e2ae59cec1ad93b3b2e28692415b5e8f5c850 100644 (file)
--- a/examples/common.h
+++ b/examples/common.h
@@ -62,6 +62,7 @@ struct gpt_params {
      bool use_color         = false; // use color to distinguish generations and inputs
      bool interactive       = false; // interactive mode
      bool prompt_cache_all  = false; // save user input and generations to prompt cache
+    bool prompt_cache_ro   = false; // open the prompt cache read-only and do not update it
  
      bool embedding         = false; // get only sentence embedding
      bool interactive_first = false; // wait for user input immediately
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index b4d129393255da8d3a079eedda58c0e214a12133..de63faa3eea767d1e9ffa7f6580877e93768ddc0 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -417,7 +417,7 @@ int main(int argc, char ** argv) {
              const bool    penalize_nl     = params.penalize_nl;
  
              // optionally save the session on first sample (for faster prompt loading next time)
-            if (!path_session.empty() && need_to_save_session) {
+            if (!path_session.empty() && need_to_save_session && !params.prompt_cache_ro) {
                  need_to_save_session = false;
                  llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
              }
@@ -630,7 +630,7 @@ int main(int argc, char ** argv) {
          }
      }
  
-    if (!path_session.empty() && params.prompt_cache_all) {
+    if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) {
          fprintf(stderr, "\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
          llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
      }
author	Willy Tarreau <redacted>
	Wed, 7 Jun 2023 02:10:17 +0000 (04:10 +0200)
committer	GitHub <redacted>
	Wed, 7 Jun 2023 02:10:17 +0000 (22:10 -0400)
examples/common.cpp		patch \| blob \| history
examples/common.h		patch \| blob \| history
examples/main/main.cpp		patch \| blob \| history