"- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
"(default: auto)",
[](common_params & params, const std::string & value) {
- /**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
- else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; }
- else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
- else if (value == "auto") { params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; }
- else { throw std::invalid_argument("invalid value"); }
+ params.reasoning_format = common_reasoning_format_from_name(value);
}
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
add_opt(common_arg(
}
}
+common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
+ if (format == "none") {
+ return COMMON_REASONING_FORMAT_NONE;
+ } else if (format == "auto") {
+ return COMMON_REASONING_FORMAT_AUTO;
+ } else if (format == "deepseek") {
+ return COMMON_REASONING_FORMAT_DEEPSEEK;
+ } else if (format == "deepseek-legacy") {
+ return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
+ }
+ throw std::runtime_error("Unknown reasoning format: " + format);
+}
+
static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
std::string arguments;
if (builder.is_partial()) {
const char* common_chat_format_name(common_chat_format format);
const char* common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format common_reasoning_format_from_name(const std::string & format);
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
`chat_template_kwargs`: Allows sending additional parameters to the json templating system. For example: `{"enable_thinking": false}`
+`reasoning_format`: The reasoning format to be parsed. If set to `none`, it will output the raw generated text.
+
+`thinking_forced_open`: Force a reasoning model to always output the reasoning. Only works on certain models.
+
+`parse_tool_calls`: Whether to parse the generated tool call.
+
*Examples:*
You can use either Python `openai` library with appropriate checkpoints:
} else {
params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format;
}
- params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format;
- params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (params_base.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
+ common_reasoning_format reasoning_format = params_base.reasoning_format;
+ if (data.contains("reasoning_format")) {
+ reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
+ }
+ params.oaicompat_chat_syntax.reasoning_format = reasoning_format;
+ params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false);
}
messages,
stream: true,
cache_prompt: true,
+ reasoning_format: 'none',
samplers: config.samplers,
temperature: config.temperature,
dynatemp_range: config.dynatemp_range,