fix: tool call parsing for LFM2 and LFM2.5 models (#21242)

author Jonathan <redacted>

Wed, 1 Apr 2026 14:22:44 +0000 (07:22 -0700)

committer GitHub <redacted>

Wed, 1 Apr 2026 14:22:44 +0000 (16:22 +0200)
author Jonathan <redacted>
Wed, 1 Apr 2026 14:22:44 +0000 (07:22 -0700)
committer GitHub <redacted>
Wed, 1 Apr 2026 14:22:44 +0000 (16:22 +0200)
diff --git a/common/chat.cpp b/common/chat.cpp

index c2ca17c74307e53fc50f80e40687aba87ad3878f..7536c0cd015ac41960879e9c87f1bf58a98faa91 100644 (file)
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1274,11 +1274,12 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
      return data;
  }
  
-// LFM2 format:
-// - Reasoning: <think>{reasoning}</think> (optional, only if enable_thinking is true)
-// - Content: text after reasoning (optional)
-// - Tool calls: <|tool_call_start|>[function_name(arg1="value1", arg2="value2")]<|tool_call_end|>
-// Tool calls can appear multiple times (parallel tool calls)
+// LFM2 format: uses <|tool_list_start|>[...]<|tool_list_end|> in system prompt
+// and <|tool_call_start|>[name(arg="val")]<|tool_call_end|> for tool calls.
+// - Reasoning: <think>{reasoning}</think> (optional)
+// - Content: text before a tool call (optional)
+// - Tool calls: Python-style, e.g. [function_name(arg1="value1", arg2="value2")]
+//   Tool calls can appear multiple times (parallel tool calls supported)
  static common_chat_params common_chat_params_init_lfm2(const common_chat_template &    tmpl,
                                                         const autoparser::generation_params & inputs) {
      common_chat_params data;
@@ -1319,9 +1320,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
          if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
              return generation_prompt + reasoning + p.content(p.rest()) + end;
          }
-
          auto tool_calls = p.rule("tool-calls",
-            p.trigger_rule("tool-call", p.literal(TOOL_CALL_START) +
+            p.trigger_rule("tool-call",
+                p.literal(TOOL_CALL_START) +
                  p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls) +
                  p.literal(TOOL_CALL_END)
              )
@@ -1349,6 +1350,80 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
              { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, TOOL_CALL_START }
          };
      }
+    return data;
+}
+
+// LFM2.5 format: uses plain "List of tools: [...]" in system prompt, no wrapper tokens.
+// Tool calls are bare [name(arg="val")], though model may optionally emit <|tool_call_start|>.
+// - Reasoning: <think>{reasoning}</think> (optional)
+// - Content: text before a tool call (optional)
+// - Tool calls: Python-style, e.g. [function_name(arg1="value1", arg2="value2")]
+//   Tool calls can appear multiple times (parallel tool calls supported)
+static common_chat_params common_chat_params_init_lfm2_5(const common_chat_template &    tmpl,
+                                                         const autoparser::generation_params & inputs) {
+    common_chat_params data;
+
+    data.prompt            = common_chat_template_direct_apply(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.preserved_tokens  = {
+        "<|tool_call_start|>",
+        "<|tool_call_end|>",
+        "<think>",
+        "</think>",
+    };
+
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+    const std::string THINK_START     = "<think>";
+    const std::string THINK_END       = "</think>";
+
+    data.thinking_start_tag = THINK_START;
+    data.thinking_end_tag   = THINK_END;
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
+        auto end = p.end();
+
+        auto reasoning = p.eps();
+        if (extract_reasoning && inputs.enable_thinking) {
+            reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
+        }
+
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return generation_prompt + reasoning + p.content(p.rest()) + end;
+        }
+
+        auto tool_calls = p.rule("tool-calls",
+            p.trigger_rule("tool-call",
+                p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls)
+            )
+        );
+
+        auto content = p.content(p.until_one_of({"<|tool_call_start|>", "["}));
+        auto maybe_start = p.optional(p.literal("<|tool_call_start|>"));
+        return generation_prompt + reasoning + content + maybe_start + tool_calls + end;
+    });
+
+    data.parser = parser.save();
+
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
+            });
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const std::string name = tool.at("function").at("name");
+            data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[" + name + "(" });
+        });
+    }
  
      return data;
  }
@@ -1530,14 +1605,21 @@ static std::optional<common_chat_params> try_specialized_template(
          return common_chat_params_init_kimi_k2(tmpl, params);
      }
  
-    // LFM2 - uses <|tool_list_start|>/<|tool_list_end|> markers and <|tool_call_start|>[name(args)]<|tool_call_end|> format
-    // Detection: template has "<|tool_list_start|>" and "<|tool_list_end|>" markers
+    // LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
+    // and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
      if (src.find("<|tool_list_start|>") != std::string::npos &&
          src.find("<|tool_list_end|>") != std::string::npos) {
          LOG_DBG("Using specialized template: LFM2\n");
          return common_chat_params_init_lfm2(tmpl, params);
      }
  
+    // LFM2.5 format detection: template uses plain "List of tools: [...]" with no special tokens
+    if (src.find("List of tools: [") != std::string::npos &&
+        src.find("<|tool_list_start|>") == std::string::npos) {
+        LOG_DBG("Using specialized template: LFM2.5\n");
+        return common_chat_params_init_lfm2_5(tmpl, params);
+    }
+
      // GigaChatV3 format detection
      if (src.find("<|role_sep|>") != std::string::npos &&
          src.find("<|message_sep|>") != std::string::npos &&
diff --git a/models/templates/LFM2.5-Instruct.jinja b/models/templates/LFM2.5-Instruct.jinja

new file mode 100644 (file)

index 0000000..7778756
--- /dev/null
+++ b/models/templates/LFM2.5-Instruct.jinja
@@ -0,0 +1,45 @@
+{{- bos_token -}}
+{%- set keep_past_thinking = keep_past_thinking | default(false) -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+    {%- set ns.system_prompt = messages[0]["content"] -%}
+    {%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+    {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
+    {%- for tool in tools -%}
+        {%- if tool is not string -%}
+            {%- set tool = tool | tojson -%}
+        {%- endif -%}
+        {%- set ns.system_prompt = ns.system_prompt + tool -%}
+        {%- if not loop.last -%}
+            {%- set ns.system_prompt = ns.system_prompt + ", " -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {%- set ns.system_prompt = ns.system_prompt + "]" -%}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+    {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- set ns.last_assistant_index = -1 -%}
+{%- for message in messages -%}
+    {%- if message["role"] == "assistant" -%}
+        {%- set ns.last_assistant_index = loop.index0 -%}
+    {%- endif -%}
+{%- endfor -%}
+{%- for message in messages -%}
+    {{- "<|im_start|>" + message["role"] + "\n" -}}
+    {%- set content = message["content"] -%}
+    {%- if content is not string -%}
+        {%- set content = content | tojson -%}
+    {%- endif -%}
+    {%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
+        {%- if "</think>" in content -%}
+            {%- set content = content.split("</think>")[-1] | trim -%}
+        {%- endif -%}
+    {%- endif -%}
+    {{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
+\ No newline at end of file
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp

index 1c4da68195547671daf51358e70ae8af6fabf513..b66916687bfb95f7ea032bc4f6e521c8ce78e71d 100644 (file)
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -2712,6 +2712,67 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
              .run();
      }
  
+    // LFM2.5 tests - uses plain "List of tools: [...]" and bare [name(args)] without wrapper tokens
+    {
+        auto tst = peg_tester("models/templates/LFM2.5-Instruct.jinja", detailed_debug);
+
+        // Basic content only
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Single tool call without reasoning
+        tst.test("[special_function(arg1=1)]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with string argument
+        tst.test("[get_time(city=\"XYZCITY\")]")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+            .run();
+
+        // Tool call with reasoning (enable_thinking=true)
+        tst.test("<think>I'm\nthinking</think>[special_function(arg1=1)]")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Multiple tool calls (parallel)
+        tst.test("[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+            })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        // Tool call with content before tool call
+        tst.test("Let me check the time.[get_time(city=\"Paris\")]")
+            .tools({ get_time_tool })
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
+            ))
+            .run();
+
+        // Partial tool call (streaming)
+        tst.test("[special_function(arg1=")
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
+            .run();
+
+        // Tool call with empty arguments
+        tst.test("[empty_args()]")
+            .tools({ empty_args_tool })
+            .expect(simple_assist_msg("", "", "empty_args", "{}"))
+            .run();
+    }
+
      // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format
      // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|>
      {
author	Jonathan <redacted>
	Wed, 1 Apr 2026 14:22:44 +0000 (07:22 -0700)
committer	GitHub <redacted>
	Wed, 1 Apr 2026 14:22:44 +0000 (16:22 +0200)
common/chat.cpp		patch \| blob \| history
models/templates/LFM2.5-Instruct.jinja	[new file with mode: 0644]	patch \| blob
tests/test-chat.cpp		patch \| blob \| history