]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
Send reasoning content back to the model across turns via the reasoning_content API...
authorPascal <redacted>
Fri, 27 Mar 2026 07:17:35 +0000 (08:17 +0100)
committerGitHub <redacted>
Fri, 27 Mar 2026 07:17:35 +0000 (08:17 +0100)
* webui: send reasoning_content back to model in context

Preserve assistant reasoning across turns by extracting it from
internal tags and sending it as a separate reasoning_content field
in the API payload. The server and Jinja templates handle native
formatting (e.g. <think> tags for Qwen, GLM, DeepSeek...).

Adds "Exclude reasoning from context" toggle in Settings > Developer
(off by default, so reasoning is preserved). Includes unit tests.

* webui: add syncable parameter for excludeReasoningFromContext

* chore: update webui build output

tools/server/public/index.html.gz
tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte
tools/server/webui/src/lib/constants/agentic.ts
tools/server/webui/src/lib/constants/settings-config.ts
tools/server/webui/src/lib/constants/settings-keys.ts
tools/server/webui/src/lib/services/chat.service.ts
tools/server/webui/src/lib/services/parameter-sync.service.ts
tools/server/webui/src/lib/stores/chat.svelte.ts
tools/server/webui/src/lib/types/api.d.ts
tools/server/webui/src/lib/types/settings.d.ts
tools/server/webui/tests/unit/reasoning-context.test.ts [new file with mode: 0644]

index 14d115fa58dc982caa68c2ac53f3d88a2b7ec458..adc7939d3b48eb69c0e44a2dd91782f40d51a6d9 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index 44d59e2b3602fee2a4e1eafb7fddf067c0aec650..995dd1fdda4d903550164bd477dbf2ad05572125 100644 (file)
                                        label: 'Disable reasoning content parsing',
                                        type: SettingsFieldType.CHECKBOX
                                },
+                               {
+                                       key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT,
+                                       label: 'Exclude reasoning from context',
+                                       type: SettingsFieldType.CHECKBOX
+                               },
                                {
                                        key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH,
                                        label: 'Enable raw output toggle',
index 7ff9e4e52134a4fee7234ea1e8a501c5035a3684..ac31d5126de15d059bbaee29dbb42bbf9cc27eb4 100644 (file)
@@ -50,6 +50,8 @@ export const AGENTIC_REGEX = {
        PARTIAL_MARKER: /<<<[A-Za-z_]*$/,
        // Matches reasoning content blocks (including tags)
        REASONING_BLOCK: /<<<reasoning_content_start>>>[\s\S]*?<<<reasoning_content_end>>>/g,
+       // Captures the reasoning text between start/end tags
+       REASONING_EXTRACT: /<<<reasoning_content_start>>>([\s\S]*?)<<<reasoning_content_end>>>/,
        // Matches an opening reasoning tag and any remaining content (unterminated)
        REASONING_OPEN: /<<<reasoning_content_start>>>[\s\S]*$/,
        // Matches a complete agentic tool call display block (start to end marker)
index ae9dd3ce8fd6fedd3e616a631b717b75900a7f5d..0b05984df99f1ab95198d09a2764bf80e26a5c10 100644 (file)
@@ -10,6 +10,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean |
        theme: ColorMode.SYSTEM,
        showThoughtInProgress: false,
        disableReasoningParsing: false,
+       excludeReasoningFromContext: false,
        showRawOutputSwitch: false,
        keepStatsVisible: false,
        showMessageStats: true,
@@ -106,6 +107,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
        showThoughtInProgress: 'Expand thought process by default when generating messages.',
        disableReasoningParsing:
                'Send reasoning_format=none to prevent server-side extraction of reasoning tokens into separate field',
+       excludeReasoningFromContext:
+               'Strip reasoning content from previous messages before sending to the model. When unchecked, reasoning is sent back via the reasoning_content field so the model can see its own chain-of-thought across turns.',
        showRawOutputSwitch:
                'Show toggle button to display messages as plain text instead of Markdown-formatted content',
        keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
index 12091035781556dc1992b72e7b5972190b5ad256..c8b4b503a6c4ff6e17d11bb5aa552797910d773a 100644 (file)
@@ -54,6 +54,7 @@ export const SETTINGS_KEYS = {
        SHOW_TOOL_CALL_IN_PROGRESS: 'showToolCallInProgress',
        // Developer
        DISABLE_REASONING_PARSING: 'disableReasoningParsing',
+       EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext',
        SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch',
        CUSTOM: 'custom'
 } as const;
index 80dc1800c707fe15ab429b85ddc552177e4be99d..1403b7c54eb4acb135aab4c623eeb66530aa7020 100644 (file)
@@ -57,6 +57,46 @@ export class ChatService {
         *
         */
 
+       /**
+        * Extracts reasoning text from content that contains internal reasoning tags.
+        * Returns the concatenated reasoning content or undefined if none found.
+        */
+       private static extractReasoningFromContent(
+               content: ApiChatMessageData['content'] | null | undefined
+       ): string | undefined {
+               if (!content) return undefined;
+
+               const extractFromString = (text: string): string => {
+                       const parts: string[] = [];
+                       // Use a fresh regex instance to avoid shared lastIndex state
+                       const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
+                       let match = re.exec(text);
+                       while (match) {
+                               parts.push(match[1]);
+                               // advance past the matched portion and retry
+                               text = text.slice(match.index + match[0].length);
+                               match = re.exec(text);
+                       }
+                       return parts.join('');
+               };
+
+               if (typeof content === 'string') {
+                       const result = extractFromString(content);
+                       return result || undefined;
+               }
+
+               if (!Array.isArray(content)) return undefined;
+
+               const parts: string[] = [];
+               for (const part of content) {
+                       if (part.type === ContentPartType.TEXT && part.text) {
+                               const result = extractFromString(part.text);
+                               if (result) parts.push(result);
+                       }
+               }
+               return parts.length > 0 ? parts.join('') : undefined;
+       }
+
        /**
         * Sends a chat completion request to the llama.cpp server.
         * Supports both streaming and non-streaming responses with comprehensive parameter configuration.
@@ -111,7 +151,8 @@ export class ChatService {
                        custom,
                        timings_per_token,
                        // Config options
-                       disableReasoningParsing
+                       disableReasoningParsing,
+                       excludeReasoningFromContext
                } = options;
 
                const normalizedMessages: ApiChatMessageData[] = messages
@@ -159,14 +200,24 @@ export class ChatService {
                }
 
                const requestBody: ApiChatCompletionRequest = {
-                       messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
-                               role: msg.role,
-                               // Strip reasoning tags/content from the prompt to avoid polluting KV cache.
-                               // TODO: investigate backend expectations for reasoning tags and add a toggle if needed.
-                               content: ChatService.stripReasoningContent(msg.content),
-                               tool_calls: msg.tool_calls,
-                               tool_call_id: msg.tool_call_id
-                       })),
+                       messages: normalizedMessages.map((msg: ApiChatMessageData) => {
+                               // Always strip internal reasoning/agentic tags from content
+                               const cleanedContent = ChatService.stripReasoningContent(msg.content);
+                               const mapped: ApiChatCompletionRequest['messages'][0] = {
+                                       role: msg.role,
+                                       content: cleanedContent,
+                                       tool_calls: msg.tool_calls,
+                                       tool_call_id: msg.tool_call_id
+                               };
+                               // When preserving reasoning, extract it from raw content and send as separate field
+                               if (!excludeReasoningFromContext) {
+                                       const reasoning = ChatService.extractReasoningFromContent(msg.content);
+                                       if (reasoning) {
+                                               mapped.reasoning_content = reasoning;
+                                       }
+                               }
+                               return mapped;
+                       }),
                        stream,
                        return_progress: stream ? true : undefined,
                        tools: tools && tools.length > 0 ? tools : undefined
index 9a290129eb01e0c59052c41cd963b8be495f3629..cc669212831877dd91561dda1c55c53aadcfa3d1 100644 (file)
@@ -227,6 +227,12 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [
                serverKey: 'alwaysShowAgenticTurns',
                type: SyncableParameterType.BOOLEAN,
                canSync: true
+       },
+       {
+               key: 'excludeReasoningFromContext',
+               serverKey: 'excludeReasoningFromContext',
+               type: SyncableParameterType.BOOLEAN,
+               canSync: true
        }
 ];
 
index e30ec97fe8a9e83ae7b377e42b85f8557031c457..e07f12b36c58a39a326aa1748567e5808428a7ed 100644 (file)
@@ -1479,6 +1479,8 @@ class ChatStore {
 
                if (currentConfig.disableReasoningParsing) apiOptions.disableReasoningParsing = true;
 
+               if (currentConfig.excludeReasoningFromContext) apiOptions.excludeReasoningFromContext = true;
+
                if (hasValue(currentConfig.temperature))
                        apiOptions.temperature = Number(currentConfig.temperature);
 
index f7f876c875dbb75dbe1f3541de9bbff409ed7447..c1a02342357d4df2b612cd94a1d7443cf64c5bdb 100644 (file)
@@ -45,6 +45,7 @@ export interface ApiErrorResponse {
 export interface ApiChatMessageData {
        role: ChatRole;
        content: string | ApiChatMessageContentPart[];
+       reasoning_content?: string;
        tool_calls?: ApiChatCompletionToolCall[];
        tool_call_id?: string;
        timestamp?: number;
@@ -201,6 +202,9 @@ export interface ApiChatCompletionRequest {
        messages: Array<{
                role: ChatRole;
                content: string | ApiChatMessageContentPart[];
+               reasoning_content?: string;
+               tool_calls?: ApiChatCompletionToolCall[];
+               tool_call_id?: string;
        }>;
        stream?: boolean;
        model?: string;
index 360740ab01983bc052f1d786a53a062568b823bf..4c545ce1dc64148e21933612bbdffed8ee14e99c 100644 (file)
@@ -24,6 +24,8 @@ export interface SettingsChatServiceOptions {
        systemMessage?: string;
        // Disable reasoning parsing (use 'none' instead of 'auto')
        disableReasoningParsing?: boolean;
+       // Strip reasoning content from context before sending
+       excludeReasoningFromContext?: boolean;
        tools?: OpenAIToolDefinition[];
        // Generation parameters
        temperature?: number;
diff --git a/tools/server/webui/tests/unit/reasoning-context.test.ts b/tools/server/webui/tests/unit/reasoning-context.test.ts
new file mode 100644 (file)
index 0000000..abbecf7
--- /dev/null
@@ -0,0 +1,196 @@
+import { describe, it, expect } from 'vitest';
+import { AGENTIC_REGEX, REASONING_TAGS } from '$lib/constants/agentic';
+import { ContentPartType } from '$lib/enums';
+
+// Replicate ChatService.extractReasoningFromContent (private static)
+function extractReasoningFromContent(
+       content: string | Array<{ type: string; text?: string }> | null | undefined
+): string | undefined {
+       if (!content) return undefined;
+
+       const extractFromString = (text: string): string => {
+               const parts: string[] = [];
+               const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
+               let match = re.exec(text);
+               while (match) {
+                       parts.push(match[1]);
+                       text = text.slice(match.index + match[0].length);
+                       match = re.exec(text);
+               }
+               return parts.join('');
+       };
+
+       if (typeof content === 'string') {
+               const result = extractFromString(content);
+               return result || undefined;
+       }
+
+       if (!Array.isArray(content)) return undefined;
+
+       const parts: string[] = [];
+       for (const part of content) {
+               if (part.type === ContentPartType.TEXT && part.text) {
+                       const result = extractFromString(part.text);
+                       if (result) parts.push(result);
+               }
+       }
+       return parts.length > 0 ? parts.join('') : undefined;
+}
+
+// Replicate ChatService.stripReasoningContent (private static)
+function stripReasoningContent(
+       content: string | Array<{ type: string; text?: string }> | null | undefined
+): typeof content {
+       if (!content) return content;
+
+       if (typeof content === 'string') {
+               return content
+                       .replace(AGENTIC_REGEX.REASONING_BLOCK, '')
+                       .replace(AGENTIC_REGEX.REASONING_OPEN, '')
+                       .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
+                       .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
+       }
+
+       if (!Array.isArray(content)) return content;
+
+       return content.map((part) => {
+               if (part.type !== ContentPartType.TEXT || !part.text) return part;
+               return {
+                       ...part,
+                       text: part.text
+                               .replace(AGENTIC_REGEX.REASONING_BLOCK, '')
+                               .replace(AGENTIC_REGEX.REASONING_OPEN, '')
+                               .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
+                               .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
+               };
+       });
+}
+
+// Simulate the message mapping logic from ChatService.sendMessage
+function buildApiMessage(
+       content: string,
+       excludeReasoningFromContext: boolean
+): { role: string; content: string; reasoning_content?: string } {
+       const cleaned = stripReasoningContent(content) as string;
+       const mapped: { role: string; content: string; reasoning_content?: string } = {
+               role: 'assistant',
+               content: cleaned
+       };
+       if (!excludeReasoningFromContext) {
+               const reasoning = extractReasoningFromContent(content);
+               if (reasoning) {
+                       mapped.reasoning_content = reasoning;
+               }
+       }
+       return mapped;
+}
+
+// Helper: wrap reasoning the same way the chat store does during streaming
+function wrapReasoning(reasoning: string, content: string): string {
+       return `${REASONING_TAGS.START}${reasoning}${REASONING_TAGS.END}${content}`;
+}
+
+describe('reasoning content extraction', () => {
+       it('extracts reasoning from tagged string content', () => {
+               const input = wrapReasoning('step 1, step 2', 'The answer is 42.');
+               const result = extractReasoningFromContent(input);
+               expect(result).toBe('step 1, step 2');
+       });
+
+       it('returns undefined when no reasoning tags present', () => {
+               expect(extractReasoningFromContent('Just a normal response.')).toBeUndefined();
+       });
+
+       it('returns undefined for null/empty input', () => {
+               expect(extractReasoningFromContent(null)).toBeUndefined();
+               expect(extractReasoningFromContent(undefined)).toBeUndefined();
+               expect(extractReasoningFromContent('')).toBeUndefined();
+       });
+
+       it('extracts reasoning from content part arrays', () => {
+               const input = [
+                       {
+                               type: ContentPartType.TEXT,
+                               text: wrapReasoning('thinking hard', 'result')
+                       }
+               ];
+               expect(extractReasoningFromContent(input)).toBe('thinking hard');
+       });
+
+       it('handles multiple reasoning blocks', () => {
+               const input =
+                       REASONING_TAGS.START +
+                       'block1' +
+                       REASONING_TAGS.END +
+                       'middle' +
+                       REASONING_TAGS.START +
+                       'block2' +
+                       REASONING_TAGS.END +
+                       'end';
+               expect(extractReasoningFromContent(input)).toBe('block1block2');
+       });
+
+       it('ignores non-text content parts', () => {
+               const input = [{ type: 'image_url', text: wrapReasoning('hidden', 'img') }];
+               expect(extractReasoningFromContent(input)).toBeUndefined();
+       });
+});
+
+describe('strip reasoning content', () => {
+       it('removes reasoning tags from string content', () => {
+               const input = wrapReasoning('internal thoughts', 'visible answer');
+               expect(stripReasoningContent(input)).toBe('visible answer');
+       });
+
+       it('removes reasoning from content part arrays', () => {
+               const input = [
+                       {
+                               type: ContentPartType.TEXT,
+                               text: wrapReasoning('thoughts', 'answer')
+                       }
+               ];
+               const result = stripReasoningContent(input) as Array<{ type: string; text?: string }>;
+               expect(result[0].text).toBe('answer');
+       });
+});
+
+describe('API message building with reasoning preservation', () => {
+       const storedContent = wrapReasoning('Let me think: 2+2=4, basic arithmetic.', 'The answer is 4.');
+
+       it('preserves reasoning_content when excludeReasoningFromContext is false', () => {
+               const msg = buildApiMessage(storedContent, false);
+               expect(msg.content).toBe('The answer is 4.');
+               expect(msg.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
+               // no internal tags leak into either field
+               expect(msg.content).not.toContain('<<<');
+               expect(msg.reasoning_content).not.toContain('<<<');
+       });
+
+       it('strips reasoning_content when excludeReasoningFromContext is true', () => {
+               const msg = buildApiMessage(storedContent, true);
+               expect(msg.content).toBe('The answer is 4.');
+               expect(msg.reasoning_content).toBeUndefined();
+       });
+
+       it('handles content with no reasoning in both modes', () => {
+               const plain = 'No reasoning here.';
+               const msgPreserve = buildApiMessage(plain, false);
+               const msgExclude = buildApiMessage(plain, true);
+               expect(msgPreserve.content).toBe(plain);
+               expect(msgPreserve.reasoning_content).toBeUndefined();
+               expect(msgExclude.content).toBe(plain);
+               expect(msgExclude.reasoning_content).toBeUndefined();
+       });
+
+       it('cleans agentic tool call blocks from content even when preserving reasoning', () => {
+               const input =
+                       wrapReasoning('plan', 'text') +
+                       '\n\n<<<AGENTIC_TOOL_CALL_START>>>\n' +
+                       '<<<TOOL_NAME:bash>>>\n' +
+                       '<<<TOOL_ARGS_START>>>\n{}\n<<<TOOL_ARGS_END>>>\nout\n' +
+                       '<<<AGENTIC_TOOL_CALL_END>>>\n';
+               const msg = buildApiMessage(input, false);
+               expect(msg.content).not.toContain('<<<');
+               expect(msg.reasoning_content).toBe('plan');
+       });
+});