]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
Add a setting to display message generation statistics (#16901)
authorAleksander Grygier <redacted>
Sat, 1 Nov 2025 14:35:57 +0000 (15:35 +0100)
committerGitHub <redacted>
Sat, 1 Nov 2025 14:35:57 +0000 (15:35 +0100)
* feat: Add setting to display message generation statistics

* chore: build static webui output

tools/server/public/index.html.gz
tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
tools/server/webui/src/lib/constants/settings-config.ts

index 29579e08c251298048f842eafac774166f23d516..a81bae04d1983d992a4b46da93153d1cfa63c704 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index e878e7bf8a21763e26858f171d998fddfb635fcb..41774c6f874ae3db367a88e9ad22d1b65448d2dd 100644 (file)
@@ -3,7 +3,16 @@
        import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
        import { isLoading } from '$lib/stores/chat.svelte';
        import { fade } from 'svelte/transition';
-       import { Check, Copy, Package, X } from '@lucide/svelte';
+       import {
+               Check,
+               Copy,
+               Package,
+               X,
+               Gauge,
+               Clock,
+               WholeWord,
+               ChartNoAxesColumn
+       } from '@lucide/svelte';
        import { Button } from '$lib/components/ui/button';
        import { Checkbox } from '$lib/components/ui/checkbox';
        import { INPUT_CLASSES } from '$lib/constants/input-classes';
                </div>
        {/if}
 
-       {#if displayedModel()}
-               <span class="mt-6 mb-4 inline-flex items-center gap-1 text-xs text-muted-foreground">
-                       <Package class="h-3.5 w-3.5" />
+       <div class="info my-6 grid gap-4">
+               {#if displayedModel()}
+                       <span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
+                               <span class="inline-flex items-center gap-1">
+                                       <Package class="h-3.5 w-3.5" />
 
-                       <span>Model used:</span>
+                                       <span>Model used:</span>
+                               </span>
 
-                       <button
-                               class="inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
-                               onclick={handleCopyModel}
-                       >
-                               {displayedModel()}
+                               <button
+                                       class="inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+                                       onclick={handleCopyModel}
+                               >
+                                       {displayedModel()}
 
-                               <Copy class="ml-1 h-3 w-3 " />
-                       </button>
-               </span>
-       {/if}
+                                       <Copy class="ml-1 h-3 w-3 " />
+                               </button>
+                       </span>
+               {/if}
+
+               {#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
+                       {@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000}
+                       <span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
+                               <span class="inline-flex items-center gap-1">
+                                       <ChartNoAxesColumn class="h-3.5 w-3.5" />
+
+                                       <span>Statistics:</span>
+                               </span>
+
+                               <div class="inline-flex flex-wrap items-center gap-2 text-xs text-muted-foreground">
+                                       <span
+                                               class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+                                       >
+                                               <Gauge class="h-3 w-3" />
+                                               {tokensPerSecond.toFixed(2)} tokens/s
+                                       </span>
+                                       <span
+                                               class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+                                       >
+                                               <WholeWord class="h-3 w-3" />
+                                               {message.timings.predicted_n} tokens
+                                       </span>
+                                       <span
+                                               class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+                                       >
+                                               <Clock class="h-3 w-3" />
+                                               {(message.timings.predicted_ms / 1000).toFixed(2)}s
+                                       </span>
+                               </div>
+                       </span>
+               {/if}
+       </div>
 
        {#if message.timestamp && !isEditing}
                <ChatMessageActions
index 20e4d3b3324e8df59b0608323b3c9a20a21d507d..e4672b787ee89f9cb9ef8a712667639c3ae9016e 100644 (file)
                                                { value: 'dark', label: 'Dark', icon: Moon }
                                        ]
                                },
+                               {
+                                       key: 'showMessageStats',
+                                       label: 'Show message generation statistics',
+                                       type: 'checkbox'
+                               },
                                {
                                        key: 'showTokensPerSecond',
                                        label: 'Show tokens per second',
index 512dcc96997e73393200fac35752a66f1479d183..c25f380846cf435308fe5504a79d9b6dedcbb2d6 100644 (file)
@@ -8,6 +8,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean> =
        showThoughtInProgress: false,
        disableReasoningFormat: false,
        keepStatsVisible: false,
+       showMessageStats: true,
        askForTitleConfirmation: false,
        pasteLongTextToFileLen: 2500,
        pdfAsImage: false,
@@ -82,6 +83,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
        disableReasoningFormat:
                'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
        keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
+       showMessageStats:
+               'Display generation statistics (tokens/second, token count, duration) below each assistant message.',
        askForTitleConfirmation:
                'Ask for confirmation before automatically changing conversation title when editing the first message.',
        pdfAsImage: 'Parse PDF as image instead of text (requires vision-capable model).',