presets : add qwen3-30B-a3b FIM (#15616)

author Georgi Gerganov <redacted>

Wed, 27 Aug 2025 12:48:07 +0000 (15:48 +0300)

committer GitHub <redacted>

Wed, 27 Aug 2025 12:48:07 +0000 (15:48 +0300)
author Georgi Gerganov <redacted>
Wed, 27 Aug 2025 12:48:07 +0000 (15:48 +0300)
committer GitHub <redacted>
Wed, 27 Aug 2025 12:48:07 +0000 (15:48 +0300)
diff --git a/common/arg.cpp b/common/arg.cpp

index 1ae3fdbf4a80250375311956a233a180d80654cf..d82f55890dee5398f9fa063e53f01c53ac8a2540 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -3538,6 +3538,22 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          }
      ).set_examples({LLAMA_EXAMPLE_SERVER}));
  
+    add_opt(common_arg(
+        {"--fim-qwen-30b-default"},
+        string_format("use default Qwen 3 Coder 30B A3B Instruct (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.model.hf_repo = "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF";
+            params.model.hf_file = "qwen3-coder-30b-a3b-instruct-q8_0.gguf";
+            params.port = 8012;
+            params.n_gpu_layers = 99;
+            params.flash_attn = true;
+            params.n_ubatch = 1024;
+            params.n_batch = 1024;
+            params.n_ctx = 0;
+            params.n_cache_reuse = 256;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+
      add_opt(common_arg(
          { "--diffusion-steps" }, "N",
          string_format("number of diffusion steps (default: %d)", params.diffusion.steps),
author	Georgi Gerganov <redacted>
	Wed, 27 Aug 2025 12:48:07 +0000 (15:48 +0300)
committer	GitHub <redacted>
	Wed, 27 Aug 2025 12:48:07 +0000 (15:48 +0300)