server : allow specifying reasoning_format in HTTP request (#15238)

author Xuan-Son Nguyen <redacted>

Mon, 11 Aug 2025 12:48:41 +0000 (14:48 +0200)

committer GitHub <redacted>

Mon, 11 Aug 2025 12:48:41 +0000 (14:48 +0200)
author Xuan-Son Nguyen <redacted>
Mon, 11 Aug 2025 12:48:41 +0000 (14:48 +0200)
committer GitHub <redacted>
Mon, 11 Aug 2025 12:48:41 +0000 (14:48 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index 0f01bb31454a4e19cc59ccaf709423ec318b4f2f..3d18aaa171ce41237455c1e42524d062cc11fcb2 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2949,11 +2949,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
          "- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
          "(default: auto)",
          [](common_params & params, const std::string & value) {
-            /**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
-            else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; }
-            else if (value == "none") {     params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
-            else if (value == "auto") {     params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; }
-            else { throw std::invalid_argument("invalid value"); }
+            params.reasoning_format = common_reasoning_format_from_name(value);
          }
      ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
      add_opt(common_arg(
diff --git a/common/chat.cpp b/common/chat.cpp

index 316bd24170c9e290dfaa98130d54f451ab2f8773..92fbbbe111f016a822858928b3006f6a51ef8083 100644 (file)
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -625,6 +625,19 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
      }
  }
  
+common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
+    if (format == "none") {
+        return COMMON_REASONING_FORMAT_NONE;
+    } else if (format == "auto") {
+        return COMMON_REASONING_FORMAT_AUTO;
+    } else if (format == "deepseek") {
+        return COMMON_REASONING_FORMAT_DEEPSEEK;
+    } else if (format == "deepseek-legacy") {
+        return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
+    }
+    throw std::runtime_error("Unknown reasoning format: " + format);
+}
+
  static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
      std::string arguments;
      if (builder.is_partial()) {
diff --git a/common/chat.h b/common/chat.h

index eb628d8bc275dd3ce50b41126301687430fc9d3f..c4d6b2e85ea2a118d3353923925ee5f71a6781cc 100644 (file)
--- a/common/chat.h
+++ b/common/chat.h
@@ -191,6 +191,7 @@ std::string common_chat_format_example(
  
  const char*               common_chat_format_name(common_chat_format format);
  const char*               common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format   common_reasoning_format_from_name(const std::string & format);
  common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
  
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
diff --git a/tools/server/README.md b/tools/server/README.md

index 87cef75730afbf55a26bb122384d6bfda6eb40c8..af9264ddd38e4ca4192c014a1c7f7d14f2e9bc90 100644 (file)
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -1132,6 +1132,12 @@ The `response_format` parameter supports both plain JSON output (e.g. `{"type":
  
  `chat_template_kwargs`: Allows sending additional parameters to the json templating system. For example: `{"enable_thinking": false}`
  
+`reasoning_format`: The reasoning format to be parsed. If set to `none`, it will output the raw generated text.
+
+`thinking_forced_open`: Force a reasoning model to always output the reasoning. Only works on certain models.
+
+`parse_tool_calls`: Whether to parse the generated tool call.
+
  *Examples:*
  
  You can use either Python `openai` library with appropriate checkpoints:
diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz

index 4e25eef1760ca1b5e4a2ee89004c0387182073be..a472714efb74e89e79cc02d65f0349f279417a82 100644 (file)

Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index a255d481a4d1c794ca4ff2f1a4cf21ae7a648bf4..45f50e93769e1dfaf54550f960506213d78ad450 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -383,8 +383,12 @@ struct server_task {
              } else {
                  params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format;
              }
-            params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format;
-            params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (params_base.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
+            common_reasoning_format reasoning_format = params_base.reasoning_format;
+            if (data.contains("reasoning_format")) {
+                reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
+            }
+            params.oaicompat_chat_syntax.reasoning_format = reasoning_format;
+            params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
              params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
              params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false);
          }
diff --git a/tools/server/webui/src/utils/app.context.tsx b/tools/server/webui/src/utils/app.context.tsx

index 96cffd95aba7c4b14ff1717ebb61a8540eb9fd83..0b95e88f893768a34f5776edbb5065099c2e1d96 100644 (file)
--- a/tools/server/webui/src/utils/app.context.tsx
+++ b/tools/server/webui/src/utils/app.context.tsx
@@ -209,6 +209,7 @@ export const AppContextProvider = ({
          messages,
          stream: true,
          cache_prompt: true,
+        reasoning_format: 'none',
          samplers: config.samplers,
          temperature: config.temperature,
          dynatemp_range: config.dynatemp_range,
author	Xuan-Son Nguyen <redacted>
	Mon, 11 Aug 2025 12:48:41 +0000 (14:48 +0200)
committer	GitHub <redacted>
	Mon, 11 Aug 2025 12:48:41 +0000 (14:48 +0200)
common/arg.cpp		patch \| blob \| history
common/chat.cpp		patch \| blob \| history
common/chat.h		patch \| blob \| history
tools/server/README.md		patch \| blob \| history
tools/server/public/index.html.gz		patch \| blob \| history
tools/server/server.cpp		patch \| blob \| history
tools/server/webui/src/utils/app.context.tsx		patch \| blob \| history