From: Piotr Wilkin (ilintar) Date: Thu, 4 Sep 2025 23:22:22 +0000 (+0200) Subject: chat : nemotron thinking & toolcalling support (#15676) X-Git-Tag: upstream/0.0.6527~141 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b2426e469e2fdb6c44216d56baa4cfff4f39ae00;p=pkg%2Fggml%2Fsources%2Fllama.cpp chat : nemotron thinking & toolcalling support (#15676) * feat: nemotron thinking & toolcalling support * Trailing whitespaces * Corrected template for Nemotron * Template and parser fixes * Final template and grammar changes * Whitespace * Always do lazy grammar processing since tag will always be there. * Allow extra content after toolcall * Whitespace * New tests: thinking + tools, tools + content, thinking + tools + content (new!) * Whitespace * Remove cURL test script --- diff --git a/common/chat.cpp b/common/chat.cpp index 955c4285..823d88de 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -623,6 +623,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_GRANITE: return "Granite"; case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS"; case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS"; + case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2"; default: throw std::runtime_error("Unknown chat format"); } @@ -1184,6 +1185,67 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te }); return data; } + +static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + + // Generate the prompt using the apply() function with the template + data.prompt = apply(tmpl, inputs); + data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2; + + // Handle thinking tags appropriately based on inputs.enable_thinking + if (string_ends_with(data.prompt, "\n")) { + if (!inputs.enable_thinking) { + data.prompt += ""; + } else { + data.thinking_forced_open = true; + } + } + + // When tools are present, build grammar for the format, similar to CommandR, but without tool call ID + if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) { + data.grammar_lazy = true; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + schemas.push_back({ + { "type", "object" }, + { "properties", + { + { "name", + { + { "type", "string" }, + { "const", function.at("name") }, + } }, + { "arguments", function.at("parameters") }, + } }, + { "required", json::array({ "name", "arguments" }) }, + }); + }); + auto schema = json{ + { "type", "array" }, + { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } }, + { "minItems", 1 }, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", + std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + + "\"\" " + builder.add_schema("tool_calls", schema) + + " \"\""); + }); + data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, + // If thinking_forced_open, then we capture the tag in the grammar, + // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) + std::string(data.thinking_forced_open ? + "[\\s\\S]*?(\\s*)" : + "(?:[\\s\\S]*?\\s*)?") + + "()[\\s\\S]*" }); + } + return data; +} static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) { if (!builder.syntax().parse_tool_calls) { builder.add_content(builder.consume_rest()); @@ -2060,6 +2122,33 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) { } } +static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) { + // Parse thinking tags + builder.try_parse_reasoning("", ""); + if (!builder.syntax().parse_tool_calls) { + builder.add_content(builder.consume_rest()); + return; + } + + // Look for tool calls + static const common_regex tool_call_regex(regex_escape("")); + if (auto res = builder.try_find_regex(tool_call_regex)) { + builder.move_to(res->groups[0].end); + + // Expect JSON array of tool calls + auto tool_calls_data = builder.consume_json(); + if (tool_calls_data.json.is_array()) { + if (!builder.try_consume_literal("")) { + throw common_chat_msg_partial_exception("Incomplete tool call"); + } + builder.add_tool_calls(tool_calls_data.json); + } else { + throw common_chat_msg_partial_exception("Incomplete tool call"); + } + } + builder.add_content(builder.consume_rest()); +} + static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) { // Parse thinking tags first - this handles the main reasoning content builder.try_parse_reasoning("", ""); @@ -2293,6 +2382,11 @@ static common_chat_params common_chat_templates_apply_jinja( return common_chat_params_init_seed_oss(tmpl, params, inputs); } + // Nemotron v2 + if (src.find("") != std::string::npos) { + return common_chat_params_init_nemotron_v2(tmpl, params); + } + // Use generic handler when mixing tools + JSON schema. // TODO: support that mix in handlers below. if ((params.tools.is_array() && params.json_schema.is_object())) { @@ -2454,6 +2548,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_SEED_OSS: common_chat_parse_seed_oss(builder); break; + case COMMON_CHAT_FORMAT_NEMOTRON_V2: + common_chat_parse_nemotron_v2(builder); + break; default: throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); } diff --git a/common/chat.h b/common/chat.h index b09ff3b1..ccd26f27 100644 --- a/common/chat.h +++ b/common/chat.h @@ -112,6 +112,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_GRANITE, COMMON_CHAT_FORMAT_GPT_OSS, COMMON_CHAT_FORMAT_SEED_OSS, + COMMON_CHAT_FORMAT_NEMOTRON_V2, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; diff --git a/models/templates/NVIDIA-Nemotron-Nano-v2.jinja b/models/templates/NVIDIA-Nemotron-Nano-v2.jinja new file mode 100644 index 00000000..c8ab5848 --- /dev/null +++ b/models/templates/NVIDIA-Nemotron-Nano-v2.jinja @@ -0,0 +1,162 @@ +{%- set ns = namespace(enable_thinking=true) -%} +{%- for message in messages -%} + {%- set content = message['content'] -%} + {%- if message['role'] == 'user' or message['role'] == 'system' -%} + {%- if '/think' in content -%} + {%- set ns.enable_thinking = true -%} + {%- elif '/no_think' in content -%} + {%- set ns.enable_thinking = false -%} + {%- endif -%} + {%- endif -%} +{%- endfor -%} + +{%- if messages[0]['role'] != 'system' -%} + {%- set ns.non_tool_system_content = '' -%} + {{- 'System +' -}} +{%- else -%} + {%- set ns.non_tool_system_content = (messages[0]['content'] | default('', true)).replace('/think', '').replace('/no_think', '').strip() -%} + {{- 'System +' + ns.non_tool_system_content }} +{%- endif -%} + +{%- if tools -%} + {%- if ns.non_tool_system_content is defined and ns.non_tool_system_content != '' -%} + {{- ' + +' -}} + {%- endif -%} + {{- 'You can use the following tools to assist the user if required:' -}} + {{- ' +[' -}} + {%- for tool in tools -%} + {{- (tool.function if tool.function is defined else tool) | tojson -}} + {{- ', ' if not loop.last else '' -}} + {%- endfor -%} + {{- '] + +' -}} + {{- 'If you decide to call any tool(s), use the following format: +' -}} + {{- '[{{"name": "tool_name1", "arguments": "tool_args1"}}, ' -}} + {{- '{{"name": "tool_name2", "arguments": "tool_args2"}}] + +' -}} + {{- 'The user will execute tool-calls and return responses from tool(s) in this format: +' -}} + {{- '[{{"tool_response1"}}, {{"tool_response2"}}] + +' -}} + {{- 'Based on the tool responses, you can call additional tools if needed, correct tool calls if any errors are found, or just respond to the user.' -}} +{%- endif -%} +{{- ' + +' -}} +{%- set messages = messages[1:] if messages[0]['role'] == 'system' else messages -%} +{%- if messages[-1]['role'] == 'assistant' -%} + {%- set ns.last_turn_assistant_content = (messages[-1]['content'] | default('', true)).strip() -%} + {%- set ns.last_turn_assistant_tool_calls = messages[-1]['tool_calls'] if 'tool_calls' in messages[-1] else [] -%} + {%- set messages = messages[:-1] -%} +{%- endif -%} + +{%- for message in messages %} + {%- set content = message['content'] %} + {%- if message['role'] == 'user' -%} + {{- 'User +' + (content | default('', true)).replace('/think', '').replace('/no_think', '').strip() + ' +' }} + {%- elif message['role'] == 'tool' -%} + {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') -%} + {{- 'User +' + '[' }} + {%- endif -%} + {{- message['content'] -}} + {{- ', ' if not loop.last and (messages[loop.index0 + 1].role == 'tool') else '' -}} + {%- if loop.last or (messages[loop.index0 + 1].role != 'tool') -%} + {{- ']' -}} + {%- endif -%} + {%- elif message['role'] == 'assistant' -%} + {%- if content and '' in content -%} + {%- set content = (content.split('')[1] | default('', true)).strip() %} + {%- endif -%} + {{- 'Assistant +' + ((content | default('', true)).strip() if content is not none else '') }} + {%- if message.tool_calls -%} + {%- if (content | default('', true)).strip() != '' -%} + {{- ' +' -}} + {%- endif -%} + {{- '[' -}} + {%- for call in message.tool_calls -%} + {%- set fn = call.function if call.function is defined else call -%} + {{- '{"name": "' + fn.name + '", "arguments": ' -}} + {%- if fn.arguments is string -%} + {{- fn.arguments -}} + {%- else -%} + {{- fn.arguments | tojson -}} + {%- endif -%} + {{- '}' + (', ' if not loop.last else '') -}} + {%- endfor -%} + {{- ']' -}} + {%- endif -%} + {{- ' + +' -}} + {%- endif -%} +{%- endfor -%} + +{%- if add_generation_prompt -%} + {{- 'Assistant +' -}} + {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%} + {{- '' -}} + {%- else -%} + {{- ' +' -}} + {%- endif -%} + {%- if ns.last_turn_assistant_content is defined and ns.last_turn_assistant_content != '' -%} + {{- ns.last_turn_assistant_content -}} + {%- endif -%} +{%- else -%} + {%- if ns.last_turn_assistant_content is defined and ns.last_turn_assistant_content != '' -%} + {{- 'Assistant +' -}} + {%- if ns.enable_thinking is defined and ns.enable_thinking is false -%} + {{- '' -}} + {%- else -%} + {{- ' +' -}} + {%- endif -%} + {{- ns.last_turn_assistant_content -}} + {%- if continue_final_message is defined -%} + {%- if continue_final_message is false -%} + {{- ' + +' -}} + {%- endif -%} + {%- else -%} + {{- ' + +' -}} + {%- endif -%} + {%- endif -%} + {%- if ns.last_turn_assistant_tool_calls is defined and ns.last_turn_assistant_tool_calls | length > 0 -%} + {{- 'Assistant +' -}} + {{- '[' -}} + {%- for call in ns.last_turn_assistant_tool_calls -%} + {%- set fn = call.function if call.function is defined else call -%} + {{- '{"name": "' + fn.name + '", "arguments": ' -}} + {%- if fn.arguments is string -%} + {{- fn.arguments -}} + {%- else -%} + {{- fn.arguments | tojson -}} + {%- endif -%} + {{- '}' + (', ' if not loop.last else '') -}} + {%- endfor -%} + {{- ']' -}} + {{- ' + +' -}} + {%- endif -%} +{%- endif -%} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 8120b45c..17ff7ea9 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -420,6 +420,7 @@ const common_chat_msg message_assist_call_empty_args = simple_assist const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg"); const common_chat_msg message_assist_call_thoughts = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}"); const common_chat_msg message_assist_call_thoughts_unparsed = simple_assist_msg("I'm\nthinking\n\n", "", "special_function", "{\"arg1\": 1}"); +const common_chat_msg message_assist_call_thoughts_content = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"); const common_chat_msg message_assist_call_id = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789"); const common_chat_msg message_assist_call_idx = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0"); const common_chat_msg message_assist_thoughts_call_idx = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0"); @@ -436,6 +437,7 @@ static void test_msgs_oaicompat_json_conversion() { message_assist_call, message_assist_call_thoughts, message_assist_call_thoughts_unparsed, + message_assist_call_thoughts_content, message_assist_call_id, message_assist_call_idx, message_assist_call_python, @@ -1755,6 +1757,77 @@ static void test_template_output_parsers() { /* is_partial= */ false, {COMMON_CHAT_FORMAT_SEED_OSS})); } + + { + auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja"); + std::vector end_tokens{ "" }; + + assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format); + + // Test parsing regular content + assert_msg_equals(message_assist, + common_chat_parse( + "Hello, world!\nWhat's up?", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_NEMOTRON_V2})); + + // Test parsing content with thinking + assert_msg_equals(message_assist_thoughts, + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + + // Test parsing tool calls + assert_msg_equals(message_assist_call, + common_chat_parse( + "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_NEMOTRON_V2})); + + // Test parsing tool calls with thinking + assert_msg_equals(message_assist_call_thoughts, + common_chat_parse( + "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK + })); + + // Test tool calls with extra content + assert_msg_equals(message_assist_call_content, + common_chat_parse( + "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]Hello, world!\nWhat's up?", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_NEMOTRON_V2} + )); + + // Test tool calls with extra content AND thinking + assert_msg_equals(message_assist_call_thoughts_content, + common_chat_parse( + "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]Hello, world!\nWhat's up?", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK + })); + + // Test template generation for regular content + test_templates(tmpls.get(), end_tokens, message_assist, tools, + "Hello, world!\nWhat's up?\n", + /* expect_grammar_triggered= */ false); + + // Test template generation for tool calls + test_templates(tmpls.get(), end_tokens, message_assist_call, tools, + "[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", + /* expect_grammar_triggered= */ true + ); + } } static void test_msg_diffs_compute() {