case COMMON_REASONING_FORMAT_AUTO: return "auto";
case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
- case COMMON_REASONING_FORMAT_GRANITE: return "granite";
default:
throw std::runtime_error("Unknown reasoning format");
}
bool add_gumbel_noise = false; // add gumbel noise to the logits if temp > 0.0
};
+// reasoning API response format (not to be confused as chat template's reasoning format)
enum common_reasoning_format {
COMMON_REASONING_FORMAT_NONE,
- COMMON_REASONING_FORMAT_AUTO,
+ COMMON_REASONING_FORMAT_AUTO, // Same as deepseek, using `message.reasoning_content`
COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY, // Extract thinking tag contents and return as `message.reasoning_content`, or leave inline in <think> tags in stream mode
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
- COMMON_REASONING_FORMAT_GRANITE, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
+ // do not extend this enum unless you absolutely have to
+ // in most cases, use COMMON_REASONING_FORMAT_AUTO
+ // see: https://github.com/ggml-org/llama.cpp/pull/15408
};
/* is_partial= */ false,
{
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_GRANITE,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
// Test parsing tool calls