* webui: add OAI-Compat Harmony tool-call live streaming visualization and persistence in chat UI
- Purely visual and diagnostic change, no effect on model context, prompt
construction, or inference behavior
- Captured assistant tool call payloads during streaming and non-streaming
completions, and persisted them in chat state and storage for downstream use
- Exposed parsed tool call labels beneath the assistant's model info line
with graceful fallback when parsing fails
- Added tool call badges beneath assistant responses that expose JSON tooltips
and copy their payloads when clicked, matching the existing model badge styling
- Added a user-facing setting to toggle tool call visibility to the Developer
settings section directly under the model selector option
* webui: remove scroll listener causing unnecessary layout updates (model selector)
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <redacted>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <redacted>
* chore: npm run format & update webui build output
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <redacted>
}
}
- function handleScroll() {
- if (isOpen) {
- updateMenuPosition();
- }
- }
-
async function handleSelect(value: string | undefined) {
if (!value) return;
}
</script>
-<svelte:window onresize={handleResize} onscroll={handleScroll} />
+<svelte:window onresize={handleResize} />
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
import { getDeletionInfo } from '$lib/stores/chat.svelte';
import { copyToClipboard } from '$lib/utils/copy';
import { isIMEComposing } from '$lib/utils/is-ime-composing';
+ import type { ApiChatCompletionToolCall } from '$lib/types/api';
import ChatMessageAssistant from './ChatMessageAssistant.svelte';
import ChatMessageUser from './ChatMessageUser.svelte';
return null;
});
+ let toolCallContent = $derived.by((): ApiChatCompletionToolCall[] | string | null => {
+ if (message.role === 'assistant') {
+ const trimmedToolCalls = message.toolCalls?.trim();
+
+ if (!trimmedToolCalls) {
+ return null;
+ }
+
+ try {
+ const parsed = JSON.parse(trimmedToolCalls);
+
+ if (Array.isArray(parsed)) {
+ return parsed as ApiChatCompletionToolCall[];
+ }
+ } catch {
+ // Harmony-only path: fall back to the raw string so issues surface visibly.
+ }
+
+ return trimmedToolCalls;
+ }
+ return null;
+ });
+
function handleCancelEdit() {
isEditing = false;
editedContent = message.content;
{showDeleteDialog}
{siblingInfo}
{thinkingContent}
+ {toolCallContent}
/>
{/if}
Gauge,
Clock,
WholeWord,
- ChartNoAxesColumn
+ ChartNoAxesColumn,
+ Wrench
} from '@lucide/svelte';
import { Button } from '$lib/components/ui/button';
import { Checkbox } from '$lib/components/ui/checkbox';
import { config } from '$lib/stores/settings.svelte';
import { modelName as serverModelName } from '$lib/stores/server.svelte';
import { copyToClipboard } from '$lib/utils/copy';
+ import type { ApiChatCompletionToolCall } from '$lib/types/api';
interface Props {
class?: string;
siblingInfo?: ChatMessageSiblingInfo | null;
textareaElement?: HTMLTextAreaElement;
thinkingContent: string | null;
+ toolCallContent: ApiChatCompletionToolCall[] | string | null;
}
let {
shouldBranchAfterEdit = false,
siblingInfo = null,
textareaElement = $bindable(),
- thinkingContent
+ thinkingContent,
+ toolCallContent = null
}: Props = $props();
+ const toolCalls = $derived(
+ Array.isArray(toolCallContent) ? (toolCallContent as ApiChatCompletionToolCall[]) : null
+ );
+ const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
+
const processingState = useProcessingState();
let currentConfig = $derived(config());
let serverModel = $derived(serverModelName());
void copyToClipboard(model ?? '');
}
+
+ function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
+ const callNumber = index + 1;
+ const functionName = toolCall.function?.name?.trim();
+ const label = functionName || `Call #${callNumber}`;
+
+ const payload: Record<string, unknown> = {};
+
+ const id = toolCall.id?.trim();
+ if (id) {
+ payload.id = id;
+ }
+
+ const type = toolCall.type?.trim();
+ if (type) {
+ payload.type = type;
+ }
+
+ if (toolCall.function) {
+ const fnPayload: Record<string, unknown> = {};
+
+ const name = toolCall.function.name?.trim();
+ if (name) {
+ fnPayload.name = name;
+ }
+
+ const rawArguments = toolCall.function.arguments?.trim();
+ if (rawArguments) {
+ try {
+ fnPayload.arguments = JSON.parse(rawArguments);
+ } catch {
+ fnPayload.arguments = rawArguments;
+ }
+ }
+
+ if (Object.keys(fnPayload).length > 0) {
+ payload.function = fnPayload;
+ }
+ }
+
+ const formattedPayload = JSON.stringify(payload, null, 2);
+
+ return {
+ label,
+ tooltip: formattedPayload,
+ copyValue: formattedPayload
+ };
+ }
+
+ function handleCopyToolCall(payload: string) {
+ void copyToClipboard(payload, 'Tool call copied to clipboard');
+ }
</script>
<div
</span>
{/if}
+ {#if config().showToolCalls}
+ {#if (toolCalls && toolCalls.length > 0) || fallbackToolCalls}
+ <span class="inline-flex flex-wrap items-center gap-2 text-xs text-muted-foreground">
+ <span class="inline-flex items-center gap-1">
+ <Wrench class="h-3.5 w-3.5" />
+
+ <span>Tool calls:</span>
+ </span>
+
+ {#if toolCalls && toolCalls.length > 0}
+ {#each toolCalls as toolCall, index (toolCall.id ?? `${index}`)}
+ {@const badge = formatToolCallBadge(toolCall, index)}
+ <button
+ type="button"
+ class="tool-call-badge inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+ title={badge.tooltip}
+ aria-label={`Copy tool call ${badge.label}`}
+ onclick={() => handleCopyToolCall(badge.copyValue)}
+ >
+ {badge.label}
+
+ <Copy class="ml-1 h-3 w-3" />
+ </button>
+ {/each}
+ {:else if fallbackToolCalls}
+ <button
+ type="button"
+ class="tool-call-badge tool-call-badge--fallback inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+ title={fallbackToolCalls}
+ aria-label="Copy tool call payload"
+ onclick={() => handleCopyToolCall(fallbackToolCalls)}
+ >
+ {fallbackToolCalls}
+
+ <Copy class="ml-1 h-3 w-3" />
+ </button>
+ {/if}
+ </span>
+ {/if}
+ {/if}
+
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
{@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000}
<span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
white-space: pre-wrap;
word-break: break-word;
}
+
+ .tool-call-badge {
+ max-width: 12rem;
+ white-space: nowrap;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ }
+
+ .tool-call-badge--fallback {
+ max-width: 20rem;
+ white-space: normal;
+ word-break: break-word;
+ }
</style>
label: 'Enable model selector',
type: 'checkbox'
},
+ {
+ key: 'showToolCalls',
+ label: 'Show tool call labels',
+ type: 'checkbox'
+ },
{
key: 'disableReasoningFormat',
label: 'Show raw LLM output',
theme: 'system',
showTokensPerSecond: false,
showThoughtInProgress: false,
+ showToolCalls: false,
disableReasoningFormat: false,
keepStatsVisible: false,
showMessageStats: true,
custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.',
showTokensPerSecond: 'Display generation speed in tokens per second during streaming.',
showThoughtInProgress: 'Expand thought process by default when generating messages.',
+ showToolCalls:
+ 'Display tool call labels and payloads from Harmony-compatible delta.tool_calls data below assistant messages.',
disableReasoningFormat:
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
import { config } from '$lib/stores/settings.svelte';
import { selectedModelName } from '$lib/stores/models.svelte';
import { slotsService } from './slots';
+import type {
+ ApiChatCompletionRequest,
+ ApiChatCompletionResponse,
+ ApiChatCompletionStreamChunk,
+ ApiChatCompletionToolCall,
+ ApiChatCompletionToolCallDelta,
+ ApiChatMessageData
+} from '$lib/types/api';
+import type {
+ DatabaseMessage,
+ DatabaseMessageExtra,
+ DatabaseMessageExtraAudioFile,
+ DatabaseMessageExtraImageFile,
+ DatabaseMessageExtraLegacyContext,
+ DatabaseMessageExtraPdfFile,
+ DatabaseMessageExtraTextFile
+} from '$lib/types/database';
+import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat';
+import type { SettingsChatServiceOptions } from '$lib/types/settings';
/**
* ChatService - Low-level API communication layer for llama.cpp server interactions
*
onComplete,
onError,
onReasoningChunk,
+ onToolCallChunk,
onModel,
onFirstValidChunk,
// Generation parameters
onComplete,
onError,
onReasoningChunk,
+ onToolCallChunk,
onModel,
onFirstValidChunk,
conversationId,
);
return;
} else {
- return this.handleNonStreamResponse(response, onComplete, onError, onModel);
+ return this.handleNonStreamResponse(
+ response,
+ onComplete,
+ onError,
+ onToolCallChunk,
+ onModel
+ );
}
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
onComplete?: (
response: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCalls?: string
) => void,
onError?: (error: Error) => void,
onReasoningChunk?: (chunk: string) => void,
+ onToolCallChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
onFirstValidChunk?: () => void,
conversationId?: string,
const decoder = new TextDecoder();
let aggregatedContent = '';
let fullReasoningContent = '';
+ let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
let hasReceivedData = false;
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
let firstValidChunkEmitted = false;
+ let toolCallIndexOffset = 0;
+ let hasOpenToolCallBatch = false;
+
+ const finalizeOpenToolCallBatch = () => {
+ if (!hasOpenToolCallBatch) {
+ return;
+ }
+
+ toolCallIndexOffset = aggregatedToolCalls.length;
+ hasOpenToolCallBatch = false;
+ };
+
+ const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
+ if (!toolCalls || toolCalls.length === 0) {
+ return;
+ }
+
+ aggregatedToolCalls = this.mergeToolCallDeltas(
+ aggregatedToolCalls,
+ toolCalls,
+ toolCallIndexOffset
+ );
+
+ if (aggregatedToolCalls.length === 0) {
+ return;
+ }
+
+ hasOpenToolCallBatch = true;
+
+ const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
+
+ if (!serializedToolCalls) {
+ return;
+ }
+
+ hasReceivedData = true;
+
+ if (!abortSignal?.aborted) {
+ onToolCallChunk?.(serializedToolCalls);
+ }
+ };
try {
let chunk = '';
const content = parsed.choices[0]?.delta?.content;
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
+ const toolCalls = parsed.choices[0]?.delta?.tool_calls;
const timings = parsed.timings;
const promptProgress = parsed.prompt_progress;
}
if (content) {
+ finalizeOpenToolCallBatch();
hasReceivedData = true;
aggregatedContent += content;
if (!abortSignal?.aborted) {
}
if (reasoningContent) {
+ finalizeOpenToolCallBatch();
hasReceivedData = true;
fullReasoningContent += reasoningContent;
if (!abortSignal?.aborted) {
onReasoningChunk?.(reasoningContent);
}
}
+
+ processToolCallDelta(toolCalls);
} catch (e) {
console.error('Error parsing JSON chunk:', e);
}
if (abortSignal?.aborted) return;
if (streamFinished) {
- if (!hasReceivedData && aggregatedContent.length === 0) {
+ finalizeOpenToolCallBatch();
+
+ if (
+ !hasReceivedData &&
+ aggregatedContent.length === 0 &&
+ aggregatedToolCalls.length === 0
+ ) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
- onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
+ const finalToolCalls =
+ aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
+
+ onComplete?.(
+ aggregatedContent,
+ fullReasoningContent || undefined,
+ lastTimings,
+ finalToolCalls
+ );
}
} catch (error) {
const err = error instanceof Error ? error : new Error('Stream error');
}
}
+ private mergeToolCallDeltas(
+ existing: ApiChatCompletionToolCall[],
+ deltas: ApiChatCompletionToolCallDelta[],
+ indexOffset = 0
+ ): ApiChatCompletionToolCall[] {
+ const result = existing.map((call) => ({
+ ...call,
+ function: call.function ? { ...call.function } : undefined
+ }));
+
+ for (const delta of deltas) {
+ const index =
+ typeof delta.index === 'number' && delta.index >= 0
+ ? delta.index + indexOffset
+ : result.length;
+
+ while (result.length <= index) {
+ result.push({ function: undefined });
+ }
+
+ const target = result[index]!;
+
+ if (delta.id) {
+ target.id = delta.id;
+ }
+
+ if (delta.type) {
+ target.type = delta.type;
+ }
+
+ if (delta.function) {
+ const fn = target.function ? { ...target.function } : {};
+
+ if (delta.function.name) {
+ fn.name = delta.function.name;
+ }
+
+ if (delta.function.arguments) {
+ fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
+ }
+
+ target.function = fn;
+ }
+ }
+
+ return result;
+ }
+
/**
* Handles non-streaming response from the chat completion API.
* Parses the JSON response and extracts the generated content.
onComplete?: (
response: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCalls?: string
) => void,
onError?: (error: Error) => void,
+ onToolCallChunk?: (chunk: string) => void,
onModel?: (model: string) => void
): Promise<string> {
try {
const content = data.choices[0]?.message?.content || '';
const reasoningContent = data.choices[0]?.message?.reasoning_content;
+ const toolCalls = data.choices[0]?.message?.tool_calls;
if (reasoningContent) {
console.log('Full reasoning content:', reasoningContent);
}
- if (!content.trim()) {
+ let serializedToolCalls: string | undefined;
+
+ if (toolCalls && toolCalls.length > 0) {
+ const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls);
+
+ if (mergedToolCalls.length > 0) {
+ serializedToolCalls = JSON.stringify(mergedToolCalls);
+ if (serializedToolCalls) {
+ onToolCallChunk?.(serializedToolCalls);
+ }
+ }
+ }
+
+ if (!content.trim() && !serializedToolCalls) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
- onComplete?.(content, reasoningContent);
+ onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
return content;
} catch (error) {
type,
timestamp: Date.now(),
thinking: '',
+ toolCalls: '',
children: [],
extra: extras
},
): Promise<void> {
let streamedContent = '';
let streamedReasoningContent = '';
+ let streamedToolCallContent = '';
let resolvedModel: string | null = null;
let modelPersisted = false;
this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
},
+ onToolCallChunk: (toolCallChunk: string) => {
+ const chunk = toolCallChunk.trim();
+
+ if (!chunk) {
+ return;
+ }
+
+ streamedToolCallContent = chunk;
+
+ const messageIndex = this.findMessageIndex(assistantMessage.id);
+
+ this.updateMessageAtIndex(messageIndex, { toolCalls: streamedToolCallContent });
+ },
+
onModel: (modelName: string) => {
recordModel(modelName);
},
onComplete: async (
finalContent?: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCallContent?: string
) => {
slotsService.stopStreaming();
const updateData: {
content: string;
thinking: string;
+ toolCalls: string;
timings?: ChatMessageTimings;
model?: string;
} = {
content: finalContent || streamedContent,
thinking: reasoningContent || streamedReasoningContent,
+ toolCalls: toolCallContent || streamedToolCallContent,
timings: timings
};
const messageIndex = this.findMessageIndex(assistantMessage.id);
- const localUpdateData: { timings?: ChatMessageTimings; model?: string } = {
+ const localUpdateData: {
+ timings?: ChatMessageTimings;
+ model?: string;
+ toolCalls?: string;
+ } = {
timings: timings
};
localUpdateData.model = updateData.model;
}
+ if (updateData.toolCalls !== undefined) {
+ localUpdateData.toolCalls = updateData.toolCalls;
+ }
+
this.updateMessageAtIndex(messageIndex, localUpdateData);
await DatabaseStore.updateCurrentNode(assistantMessage.convId, assistantMessage.id);
content: '',
timestamp: Date.now(),
thinking: '',
+ toolCalls: '',
children: [],
model: null
},
role: messageToEdit.role,
content: newContent,
thinking: messageToEdit.thinking || '',
+ toolCalls: messageToEdit.toolCalls || '',
children: [],
model: messageToEdit.model // Preserve original model info when branching
},
role: messageToEdit.role,
content: newContent,
thinking: messageToEdit.thinking || '',
+ toolCalls: messageToEdit.toolCalls || '',
children: [],
extra: messageToEdit.extra ? JSON.parse(JSON.stringify(messageToEdit.extra)) : undefined,
model: messageToEdit.model // Preserve original model info when branching
role: 'assistant',
content: '',
thinking: '',
+ toolCalls: '',
children: [],
model: null
},
role: 'assistant',
content: '',
thinking: '',
+ toolCalls: '',
children: [],
model: null
},
...message,
id: uuid(),
parent: parentId,
+ toolCalls: message.toolCalls ?? '',
children: []
};
content: '',
parent: null,
thinking: '',
+ toolCalls: '',
children: []
};
samplers?: string[];
// Custom parameters (JSON string)
custom?: Record<string, unknown>;
+ timings_per_token?: boolean;
+}
+
+export interface ApiChatCompletionToolCallFunctionDelta {
+ name?: string;
+ arguments?: string;
+}
+
+export interface ApiChatCompletionToolCallDelta {
+ index?: number;
+ id?: string;
+ type?: string;
+ function?: ApiChatCompletionToolCallFunctionDelta;
+}
+
+export interface ApiChatCompletionToolCall extends ApiChatCompletionToolCallDelta {
+ function?: ApiChatCompletionToolCallFunctionDelta & { arguments?: string };
}
export interface ApiChatCompletionStreamChunk {
content?: string;
reasoning_content?: string;
model?: string;
+ tool_calls?: ApiChatCompletionToolCallDelta[];
};
}>;
timings?: {
content: string;
reasoning_content?: string;
model?: string;
+ tool_calls?: ApiChatCompletionToolCallDelta[];
};
}>;
}
content: string;
parent: string;
thinking: string;
+ toolCalls?: string;
children: string[];
extra?: DatabaseMessageExtra[];
timings?: ChatMessageTimings;
samplers?: string | string[];
// Custom parameters
custom?: string;
+ timings_per_token?: boolean;
// Callbacks
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
+ onToolCallChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
onFirstValidChunk?: () => void;
- onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
+ onComplete?: (
+ response: string,
+ reasoningContent?: string,
+ timings?: ChatMessageTimings,
+ toolCalls?: string
+ ) => void;
onError?: (error: Error) => void;
}