From: Pascal Date: Thu, 18 Dec 2025 16:55:03 +0000 (+0100) Subject: webui: display prompt processing stats (#18146) X-Git-Tag: upstream/0.0.7599~125 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=f9ec8858edea4a0ecfea149d6815ebfb5ecc3bcd;p=pkg%2Fggml%2Fsources%2Fllama.cpp webui: display prompt processing stats (#18146) * webui: display prompt processing stats * feat: Improve UI of Chat Message Statistics * chore: update webui build output * refactor: Post-review improvements * chore: update webui build output --------- Co-authored-by: Aleksander Grygier --- diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index a1d62273..9e44f032 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index 2c9a012e..8997963f 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -244,7 +244,7 @@
{#if displayedModel()} - +
{#if isRouter} {/if} - +
{/if} {#if config().showToolCalls} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte index a453a310..a39acb1d 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte @@ -1,20 +1,122 @@ - + let promptTokensPerSecond = $derived( + promptTokens !== undefined && promptMs !== undefined + ? (promptTokens / promptMs) * 1000 + : undefined + ); + + let promptTimeInSeconds = $derived( + promptMs !== undefined ? (promptMs / 1000).toFixed(2) : undefined + ); + + let hasPromptStats = $derived( + promptTokens !== undefined && + promptMs !== undefined && + promptTokensPerSecond !== undefined && + promptTimeInSeconds !== undefined + ); + - +
+
+ {#if hasPromptStats} + + + + + +

Reading (prompt processing)

+
+
+ {/if} + + + + + +

Generation (token output)

+
+
+
- +
+ {#if activeView === ChatMessageStatsView.GENERATION} + + + + {:else if hasPromptStats} + + + + {/if} +
+
diff --git a/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte b/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte index 9e5339ca..a2b28d20 100644 --- a/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte +++ b/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte @@ -1,5 +1,6 @@ - - {#snippet icon()} - - {/snippet} +{#if tooltipLabel} + + + + {#snippet icon()} + + {/snippet} - {value} - + {value} + + + +

{tooltipLabel}

+
+ +{:else} + + {#snippet icon()} + + {/snippet} + + {value} + +{/if} diff --git a/tools/server/webui/src/lib/enums/chat.ts b/tools/server/webui/src/lib/enums/chat.ts new file mode 100644 index 00000000..2b9eb7bc --- /dev/null +++ b/tools/server/webui/src/lib/enums/chat.ts @@ -0,0 +1,4 @@ +export enum ChatMessageStatsView { + GENERATION = 'generation', + READING = 'reading' +} diff --git a/tools/server/webui/src/lib/enums/index.ts b/tools/server/webui/src/lib/enums/index.ts index d9e90014..83c86caf 100644 --- a/tools/server/webui/src/lib/enums/index.ts +++ b/tools/server/webui/src/lib/enums/index.ts @@ -1,5 +1,7 @@ export { AttachmentType } from './attachment'; +export { ChatMessageStatsView } from './chat'; + export { FileTypeCategory, FileTypeImage, diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 4f78840a..e0431ee6 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -171,6 +171,7 @@ class ChatStore { updateProcessingStateFromTimings( timingData: { prompt_n: number; + prompt_ms?: number; predicted_n: number; predicted_per_second: number; cache_n: number; @@ -212,6 +213,7 @@ class ChatStore { if (message.role === 'assistant' && message.timings) { const restoredState = this.parseTimingData({ prompt_n: message.timings.prompt_n || 0, + prompt_ms: message.timings.prompt_ms, predicted_n: message.timings.predicted_n || 0, predicted_per_second: message.timings.predicted_n && message.timings.predicted_ms @@ -282,6 +284,7 @@ class ChatStore { private parseTimingData(timingData: Record): ApiProcessingState | null { const promptTokens = (timingData.prompt_n as number) || 0; + const promptMs = (timingData.prompt_ms as number) || undefined; const predictedTokens = (timingData.predicted_n as number) || 0; const tokensPerSecond = (timingData.predicted_per_second as number) || 0; const cacheTokens = (timingData.cache_n as number) || 0; @@ -320,6 +323,7 @@ class ChatStore { speculative: false, progressPercent, promptTokens, + promptMs, cacheTokens }; } @@ -536,6 +540,7 @@ class ChatStore { this.updateProcessingStateFromTimings( { prompt_n: timings?.prompt_n || 0, + prompt_ms: timings?.prompt_ms, predicted_n: timings?.predicted_n || 0, predicted_per_second: tokensPerSecond, cache_n: timings?.cache_n || 0, @@ -768,10 +773,11 @@ class ChatStore { content: streamingState.response }; if (lastMessage.thinking?.trim()) updateData.thinking = lastMessage.thinking; - const lastKnownState = this.getCurrentProcessingStateSync(); + const lastKnownState = this.getProcessingState(conversationId); if (lastKnownState) { updateData.timings = { prompt_n: lastKnownState.promptTokens || 0, + prompt_ms: lastKnownState.promptMs, predicted_n: lastKnownState.tokensDecoded || 0, cache_n: lastKnownState.cacheTokens || 0, predicted_ms: @@ -1253,6 +1259,7 @@ class ChatStore { this.updateProcessingStateFromTimings( { prompt_n: timings?.prompt_n || 0, + prompt_ms: timings?.prompt_ms, predicted_n: timings?.predicted_n || 0, predicted_per_second: tokensPerSecond, cache_n: timings?.cache_n || 0, diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index c3f47077..e5fde24c 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -342,6 +342,7 @@ export interface ApiProcessingState { // Progress information from prompt_progress progressPercent?: number; promptTokens?: number; + promptMs?: number; cacheTokens?: number; }