llama : add --completion-bash option (#11846)

author Daniel Bevenius <redacted>

Thu, 13 Feb 2025 13:46:59 +0000 (14:46 +0100)

committer GitHub <redacted>

Thu, 13 Feb 2025 13:46:59 +0000 (14:46 +0100)
author Daniel Bevenius <redacted>
Thu, 13 Feb 2025 13:46:59 +0000 (14:46 +0100)
committer GitHub <redacted>
Thu, 13 Feb 2025 13:46:59 +0000 (14:46 +0100)
diff --git a/README.md b/README.md

index 43290f2c53a75d914b99646647fe2704ba4ab107..ebbd11a02bdd04451fbb1e494f8a5529de06649f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -521,3 +521,17 @@ If your issue is with model generation quality, then please at least scan the fo
  
  #### References
  
+
+### Completions
+Command-line completion is available for some environments.
+
+#### Bash Completion
+```bash
+$ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash
+$ source ~/.llama-completion.bash
+```
+Optionally this can be added to your `.bashrc` or `.bash_profile` to load it
+automatically. For example:
+```console
+$ echo "source ~/.llama-completion.bash" >> ~/.bashrc
+```
diff --git a/common/arg.cpp b/common/arg.cpp

index cda03cb7731fcfb653da205260af7f9ee272d139..144e93fa9b8677ab0f89732ced547c659697f462 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -365,6 +365,108 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
      print_options(specific_options);
  }
  
+static void common_params_print_completion(common_params_context & ctx_arg) {
+    std::vector<common_arg *> common_options;
+    std::vector<common_arg *> sparam_options;
+    std::vector<common_arg *> specific_options;
+
+    for (auto & opt : ctx_arg.options) {
+        if (opt.is_sparam) {
+            sparam_options.push_back(&opt);
+        } else if (opt.in_example(ctx_arg.ex)) {
+            specific_options.push_back(&opt);
+        } else {
+            common_options.push_back(&opt);
+        }
+    }
+
+    printf("_llama_completions() {\n");
+    printf("    local cur prev opts\n");
+    printf("    COMPREPLY=()\n");
+    printf("    cur=\"${COMP_WORDS[COMP_CWORD]}\"\n");
+    printf("    prev=\"${COMP_WORDS[COMP_CWORD-1]}\"\n\n");
+
+    printf("    opts=\"");
+    auto print_options = [](const std::vector<common_arg *> & options) {
+        for (const common_arg * opt : options) {
+            for (const char * arg : opt->args) {
+                printf("%s ", arg);
+            }
+        }
+    };
+
+    print_options(common_options);
+    print_options(sparam_options);
+    print_options(specific_options);
+    printf("\"\n\n");
+
+    printf("    case \"$prev\" in\n");
+    printf("        --model)\n");
+    printf("            COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
+    printf("            return 0\n");
+    printf("            ;;\n");
+    printf("        --grammar-file)\n");
+    printf("            COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
+    printf("            return 0\n");
+    printf("            ;;\n");
+    printf("        *)\n");
+    printf("            COMPREPLY=( $(compgen -W \"${opts}\" -- \"$cur\") )\n");
+    printf("            return 0\n");
+    printf("            ;;\n");
+    printf("    esac\n");
+    printf("}\n\n");
+
+    std::set<std::string> executables = {
+        "llama-batched",
+        "llama-batched-bench",
+        "llama-bench",
+        "llama-cli",
+        "llama-convert-llama2c-to-ggml",
+        "llama-cvector-generator",
+        "llama-embedding",
+        "llama-eval-callback",
+        "llama-export-lora",
+        "llama-gbnf-validator",
+        "llama-gen-docs",
+        "llama-gguf",
+        "llama-gguf-hash",
+        "llama-gguf-split",
+        "llama-gritlm",
+        "llama-imatrix",
+        "llama-infill",
+        "llama-llava-cli",
+        "llama-llava-clip-quantize-cli",
+        "llama-lookahead",
+        "llama-lookup",
+        "llama-lookup-create",
+        "llama-lookup-merge",
+        "llama-lookup-stats",
+        "llama-minicpmv-cli",
+        "llama-parallel",
+        "llama-passkey",
+        "llama-perplexity",
+        "llama-q8dot",
+        "llama-quantize",
+        "llama-quantize-stats",
+        "llama-qwen2vl-cli",
+        "llama-retrieval",
+        "llama-run",
+        "llama-save-load-state",
+        "llama-server",
+        "llama-simple",
+        "llama-simple-chat",
+        "llama-speculative",
+        "llama-speculative-simple",
+        "llama-tokenize",
+        "llama-tts",
+        "llama-vdot"
+    };
+
+    for (const auto& exe : executables) {
+        printf("complete -F _llama_completions %s\n", exe.c_str());
+    }
+}
+
  static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & value) {
      std::vector<ggml_backend_dev_t> devices;
      auto dev_names = string_split<std::string>(value, ',');
@@ -426,6 +528,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
              }
              exit(0);
          }
+        if (ctx_arg.params.completion) {
+            common_params_print_completion(ctx_arg);
+            exit(0);
+        }
      } catch (const std::invalid_argument & ex) {
          fprintf(stderr, "%s\n", ex.what());
          ctx_arg.params = params_org;
@@ -494,6 +600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
              exit(0);
          }
      ));
+    add_opt(common_arg(
+        {"--completion-bash"},
+        "print source-able bash completion script for llama.cpp",
+        [](common_params & params) {
+            params.completion = true;
+        }
+    ));
      add_opt(common_arg(
          {"--verbose-prompt"},
          string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
diff --git a/common/common.h b/common/common.h

index 5c0f46284afbf8e35d9aaf5d36795f06a9745c83..98b9a4464787a93cb476abed1f332909dcb7fe5a 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -298,6 +298,7 @@ struct common_params {
      bool   kl_divergence    = false; // compute KL divergence
  
      bool usage             = false; // print usage
+    bool completion        = false; // print source-able completion script
      bool use_color         = false; // use color to distinguish generations and inputs
      bool special           = false; // enable special token output
      bool interactive       = false; // interactive mode
author	Daniel Bevenius <redacted>
	Thu, 13 Feb 2025 13:46:59 +0000 (14:46 +0100)
committer	GitHub <redacted>
	Thu, 13 Feb 2025 13:46:59 +0000 (14:46 +0100)
README.md		patch \| blob \| history
common/arg.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history