common : add llama.vim preset for Qwen2.5 Coder (#11945)

author Daniel Bevenius <redacted>

Wed, 19 Feb 2025 11:29:52 +0000 (12:29 +0100)

committer GitHub <redacted>

Wed, 19 Feb 2025 11:29:52 +0000 (12:29 +0100)
author Daniel Bevenius <redacted>
Wed, 19 Feb 2025 11:29:52 +0000 (12:29 +0100)
committer GitHub <redacted>
Wed, 19 Feb 2025 11:29:52 +0000 (12:29 +0100)
diff --git a/common/arg.cpp b/common/arg.cpp

index eb8beccac2ee7e93bb6558d934a0b9cbde15bb1c..3c169b5b5f48ef5169573c3dbbaafa5be1d4f5ff 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2502,5 +2502,53 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          }
      ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
  
+    add_opt(common_arg(
+        {"--fim-qwen-1.5b-default"},
+        string_format("use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF";
+            params.hf_file = "qwen2.5-coder-1.5b-q8_0.gguf";
+            params.port = 8012;
+            params.n_gpu_layers = 99;
+            params.flash_attn = true;
+            params.n_ubatch = 1024;
+            params.n_batch = 1024;
+            params.n_ctx = 0;
+            params.n_cache_reuse = 256;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+
+    add_opt(common_arg(
+        {"--fim-qwen-3b-default"},
+        string_format("use default Qwen 2.5 Coder 3B (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.hf_repo = "ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF";
+            params.hf_file = "qwen2.5-coder-3b-q8_0.gguf";
+            params.port = 8012;
+            params.n_gpu_layers = 99;
+            params.flash_attn = true;
+            params.n_ubatch = 1024;
+            params.n_batch = 1024;
+            params.n_ctx = 0;
+            params.n_cache_reuse = 256;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+
+    add_opt(common_arg(
+        {"--fim-qwen-7b-default"},
+        string_format("use default Qwen 2.5 Coder 7B (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF";
+            params.hf_file = "qwen2.5-coder-7b-q8_0.gguf";
+            params.port = 8012;
+            params.n_gpu_layers = 99;
+            params.flash_attn = true;
+            params.n_ubatch = 1024;
+            params.n_batch = 1024;
+            params.n_ctx = 0;
+            params.n_cache_reuse = 256;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+
      return ctx_arg;
  }
author	Daniel Bevenius <redacted>
	Wed, 19 Feb 2025 11:29:52 +0000 (12:29 +0100)
committer	GitHub <redacted>
	Wed, 19 Feb 2025 11:29:52 +0000 (12:29 +0100)