llama: -fa 1/0/-1 aliases for -fa on/off/auto (#15746)

author Johannes Gäßler <redacted>

Tue, 2 Sep 2025 16:17:26 +0000 (18:17 +0200)

committer GitHub <redacted>

Tue, 2 Sep 2025 16:17:26 +0000 (18:17 +0200)
author Johannes Gäßler <redacted>
Tue, 2 Sep 2025 16:17:26 +0000 (18:17 +0200)
committer GitHub <redacted>
Tue, 2 Sep 2025 16:17:26 +0000 (18:17 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index 4fa214d3d28569cf9d74e6dc00b38ec4bb671e86..fcee0c447007765494ea991f5d080a8ba3debc20 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1548,11 +1548,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          {"-fa", "--flash-attn"}, "FA",
          string_format("set Flash Attention use ('on', 'off', or 'auto', default: '%s')", llama_flash_attn_type_name(params.flash_attn_type)),
          [](common_params & params, const std::string & value) {
-            if (value == "on" || value == "enabled") {
+            if (value == "on" || value == "enabled" || value == "1") {
                  params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
-            } else if (value == "off" || value == "disabled") {
+            } else if (value == "off" || value == "disabled" || value == "0") {
                  params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
-            } else if (value == "auto") {
+            } else if (value == "auto" || value == "-1") {
                  params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
              } else {
                  throw std::runtime_error(string_format("error: unkown value for --flash-attn: '%s'\n", value.c_str()));
author	Johannes Gäßler <redacted>
	Tue, 2 Sep 2025 16:17:26 +0000 (18:17 +0200)
committer	GitHub <redacted>
	Tue, 2 Sep 2025 16:17:26 +0000 (18:17 +0200)