arg : option to exclude arguments from specific examples (#11136)

author Georgi Gerganov <redacted>

Wed, 8 Jan 2025 10:55:36 +0000 (12:55 +0200)

committer GitHub <redacted>

Wed, 8 Jan 2025 10:55:36 +0000 (12:55 +0200)
author Georgi Gerganov <redacted>
Wed, 8 Jan 2025 10:55:36 +0000 (12:55 +0200)
committer GitHub <redacted>
Wed, 8 Jan 2025 10:55:36 +0000 (12:55 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index c81b1521732c2a5830f29cf6a81550e526d6e439..27886b84e862c4049dd208fd7f3908e447f67847 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -22,6 +22,11 @@ common_arg & common_arg::set_examples(std::initializer_list<enum llama_example>
      return *this;
  }
  
+common_arg & common_arg::set_excludes(std::initializer_list<enum llama_example> excludes) {
+    this->excludes = std::move(excludes);
+    return *this;
+}
+
  common_arg & common_arg::set_env(const char * env) {
      help = help + "\n(env: " + env + ")";
      this->env = env;
@@ -37,6 +42,10 @@ bool common_arg::in_example(enum llama_example ex) {
      return examples.find(ex) != examples.end();
  }
  
+bool common_arg::is_exclude(enum llama_example ex) {
+    return excludes.find(ex) != excludes.end();
+}
+
  bool common_arg::get_value_from_env(std::string & output) {
      if (env == nullptr) return false;
      char * value = std::getenv(env);
@@ -420,7 +429,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
       * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
       */
      auto add_opt = [&](common_arg arg) {
-        if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
+        if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) {
              ctx_arg.options.push_back(std::move(arg));
          }
      };
@@ -649,7 +658,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          [](common_params & params, const std::string & value) {
              params.prompt = value;
          }
-    ));
+    ).set_excludes({LLAMA_EXAMPLE_SERVER}));
      add_opt(common_arg(
          {"--no-perf"},
          string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
@@ -673,7 +682,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                  params.prompt.pop_back();
              }
          }
-    ));
+    ).set_excludes({LLAMA_EXAMPLE_SERVER}));
      add_opt(common_arg(
          {"--in-file"}, "FNAME",
          "an input file (repeat to specify multiple files)",
@@ -700,7 +709,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
              params.prompt = ss.str();
              fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str());
          }
-    ));
+    ).set_excludes({LLAMA_EXAMPLE_SERVER}));
      add_opt(common_arg(
          {"-e", "--escape"},
          string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"),
diff --git a/common/arg.h b/common/arg.h

index a6700d323cc14b54b4ef5b2445d29e4baec19594..49ab8667b10527c05d1c6aed3593ad78170518c0 100644 (file)
--- a/common/arg.h
+++ b/common/arg.h
@@ -12,6 +12,7 @@
  
  struct common_arg {
      std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
+    std::set<enum llama_example> excludes = {};
      std::vector<const char *> args;
      const char * value_hint   = nullptr; // help text or example for arg value
      const char * value_hint_2 = nullptr; // for second arg value
@@ -53,9 +54,11 @@ struct common_arg {
      ) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
  
      common_arg & set_examples(std::initializer_list<enum llama_example> examples);
+    common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
      common_arg & set_env(const char * env);
      common_arg & set_sparam();
      bool in_example(enum llama_example ex);
+    bool is_exclude(enum llama_example ex);
      bool get_value_from_env(std::string & output);
      bool has_value_from_env();
      std::string to_string();
diff --git a/examples/server/README.md b/examples/server/README.md

index 3ce16945ac8072d06ad16b60e6708f1c65fe89b7..1f0a27d967d507658a456d88de9988e746ebf98a 100644 (file)
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -45,10 +45,7 @@ The project is under active development, and we are [looking for feedback and co
  | `-ub, --ubatch-size N` | physical maximum batch size (default: 512)<br/>(env: LLAMA_ARG_UBATCH) |
  | `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) |
  | `-fa, --flash-attn` | enable Flash Attention (default: disabled)<br/>(env: LLAMA_ARG_FLASH_ATTN) |
-| `-p, --prompt PROMPT` | prompt to start generation with |
  | `--no-perf` | disable internal libllama performance timings (default: false)<br/>(env: LLAMA_ARG_NO_PERF) |
-| `-f, --file FNAME` | a file containing the prompt (default: none) |
-| `-bf, --binary-file FNAME` | binary file containing the prompt (default: none) |
  | `-e, --escape` | process escapes sequences (\n, \r, \t, \', \", \\) (default: true) |
  | `--no-escape` | do not process escape sequences |
  | `--rope-scaling {none,linear,yarn}` | RoPE frequency scaling method, defaults to linear unless specified by the model<br/>(env: LLAMA_ARG_ROPE_SCALING_TYPE) |
author	Georgi Gerganov <redacted>
	Wed, 8 Jan 2025 10:55:36 +0000 (12:55 +0200)
committer	GitHub <redacted>
	Wed, 8 Jan 2025 10:55:36 +0000 (12:55 +0200)
common/arg.cpp		patch \| blob \| history
common/arg.h		patch \| blob \| history
examples/server/README.md		patch \| blob \| history