]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
Webui/prompt processing progress (#18300)
authorPascal <redacted>
Mon, 29 Dec 2025 18:32:21 +0000 (19:32 +0100)
committerGitHub <redacted>
Mon, 29 Dec 2025 18:32:21 +0000 (19:32 +0100)
* webui: display prompt preprocessing progress

* webui: add percentage/ETA and exclude cached tokens from progress

Address review feedback from ngxson

* webui: add minutes and first chunk (0%) case

* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Co-authored-by: Aleksander Grygier <redacted>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Co-authored-by: Aleksander Grygier <redacted>
* webui: address review feedback from allozaur

* chore: update webui build output

* webui: address review feedback from allozaur

* nit

* chore: update webui build output

* feat: Enhance chat processing state

* feat: Improve chat processing statistics UI

* chore: update webui build output

* feat: Add live generation statistics to processing state hook

* feat: Persist prompt processing stats in hook for better UX

* refactor: Enhance ChatMessageStatistics for live stream display

* feat: Implement enhanced live chat statistics into assistant message

* chore: update webui build output

* fix: Proper tab for each stage of prompt processing/generation

* chore: update webui build output

* fix: Improved ETA calculation & display logic

* chore: update webui build output

* feat: Simplify logic & remove ETA from prompt progress

* chore: update webui build output

---------

Co-authored-by: Aleksander Grygier <redacted>
tools/server/public/index.html.gz
tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte
tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts
tools/server/webui/src/lib/services/chat.ts
tools/server/webui/src/lib/stores/chat.svelte.ts
tools/server/webui/src/lib/types/api.d.ts
tools/server/webui/src/lib/types/settings.d.ts

index cf5c625b40eb813355a71c2feeec222bf7e2614f..fad15e38e9a396e834c28beaec187c0d58bbf3f2 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index 8997963f1622aa7914805537e5b408fe1a7a678a..c1ef4dfd0f55c4374d563bc3ea06479ea95491cc 100644 (file)
@@ -89,6 +89,7 @@
        const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
 
        const processingState = useProcessingState();
+
        let currentConfig = $derived(config());
        let isRouter = $derived(isRouterMode());
        let displayedModel = $derived((): string | null => {
                }
        });
 
+       $effect(() => {
+               if (isLoading() && !message?.content?.trim()) {
+                       processingState.startMonitoring();
+               }
+       });
+
        function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
                const callNumber = index + 1;
                const functionName = toolCall.function?.name?.trim();
                <div class="mt-6 w-full max-w-[48rem]" in:fade>
                        <div class="processing-container">
                                <span class="processing-text">
-                                       {processingState.getProcessingMessage()}
+                                       {processingState.getPromptProgressText() ?? processingState.getProcessingMessage()}
                                </span>
                        </div>
                </div>
                                                predictedTokens={message.timings.predicted_n}
                                                predictedMs={message.timings.predicted_ms}
                                        />
+                               {:else if isLoading() && currentConfig.showMessageStats}
+                                       {@const liveStats = processingState.getLiveProcessingStats()}
+                                       {@const genStats = processingState.getLiveGenerationStats()}
+                                       {@const promptProgress = processingState.processingState?.promptProgress}
+                                       {@const isStillProcessingPrompt =
+                                               promptProgress && promptProgress.processed < promptProgress.total}
+
+                                       {#if liveStats || genStats}
+                                               <ChatMessageStatistics
+                                                       isLive={true}
+                                                       isProcessingPrompt={!!isStillProcessingPrompt}
+                                                       promptTokens={liveStats?.tokensProcessed}
+                                                       promptMs={liveStats?.timeMs}
+                                                       predictedTokens={genStats?.tokensGenerated}
+                                                       predictedMs={genStats?.timeMs}
+                                               />
+                                       {/if}
                                {/if}
                        </div>
                {/if}
index a39acb1d758689a02f0d97fec89a46cb079ba124..24fe5926bad1b72e285757c69493baf1b8c0ead5 100644 (file)
@@ -5,21 +5,64 @@
        import { ChatMessageStatsView } from '$lib/enums';
 
        interface Props {
-               predictedTokens: number;
-               predictedMs: number;
+               predictedTokens?: number;
+               predictedMs?: number;
                promptTokens?: number;
                promptMs?: number;
+               // Live mode: when true, shows stats during streaming
+               isLive?: boolean;
+               // Whether prompt processing is still in progress
+               isProcessingPrompt?: boolean;
+               // Initial view to show (defaults to READING in live mode)
+               initialView?: ChatMessageStatsView;
        }
 
-       let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props();
+       let {
+               predictedTokens,
+               predictedMs,
+               promptTokens,
+               promptMs,
+               isLive = false,
+               isProcessingPrompt = false,
+               initialView = ChatMessageStatsView.GENERATION
+       }: Props = $props();
 
-       let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION);
+       let activeView: ChatMessageStatsView = $state(initialView);
+       let hasAutoSwitchedToGeneration = $state(false);
 
-       let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
-       let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
+       // In live mode: auto-switch to GENERATION tab when prompt processing completes
+       $effect(() => {
+               if (isLive) {
+                       // Auto-switch to generation tab only when prompt processing is done (once)
+                       if (
+                               !hasAutoSwitchedToGeneration &&
+                               !isProcessingPrompt &&
+                               predictedTokens &&
+                               predictedTokens > 0
+                       ) {
+                               activeView = ChatMessageStatsView.GENERATION;
+                               hasAutoSwitchedToGeneration = true;
+                       } else if (!hasAutoSwitchedToGeneration) {
+                               // Stay on READING while prompt is still being processed
+                               activeView = ChatMessageStatsView.READING;
+                       }
+               }
+       });
+
+       let hasGenerationStats = $derived(
+               predictedTokens !== undefined &&
+                       predictedTokens > 0 &&
+                       predictedMs !== undefined &&
+                       predictedMs > 0
+       );
+
+       let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
+       let timeInSeconds = $derived(
+               predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
+       );
 
        let promptTokensPerSecond = $derived(
-               promptTokens !== undefined && promptMs !== undefined
+               promptTokens !== undefined && promptMs !== undefined && promptMs > 0
                        ? (promptTokens / promptMs) * 1000
                        : undefined
        );
                        promptTokensPerSecond !== undefined &&
                        promptTimeInSeconds !== undefined
        );
+
+       // In live mode, generation tab is disabled until we have generation stats
+       let isGenerationDisabled = $derived(isLive && !hasGenerationStats);
 </script>
 
 <div class="inline-flex items-center text-xs text-muted-foreground">
        <div class="inline-flex items-center rounded-sm bg-muted-foreground/15 p-0.5">
-               {#if hasPromptStats}
+               {#if hasPromptStats || isLive}
                        <Tooltip.Root>
                                <Tooltip.Trigger>
                                        <button
                                        class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
                                        ChatMessageStatsView.GENERATION
                                                ? 'bg-background text-foreground shadow-sm'
-                                               : 'hover:text-foreground'}"
-                                       onclick={() => (activeView = ChatMessageStatsView.GENERATION)}
+                                               : isGenerationDisabled
+                                                       ? 'cursor-not-allowed opacity-40'
+                                                       : 'hover:text-foreground'}"
+                                       onclick={() => !isGenerationDisabled && (activeView = ChatMessageStatsView.GENERATION)}
+                                       disabled={isGenerationDisabled}
                                >
                                        <Sparkles class="h-3 w-3" />
                                        <span class="sr-only">Generation</span>
                                </button>
                        </Tooltip.Trigger>
                        <Tooltip.Content>
-                               <p>Generation (token output)</p>
+                               <p>
+                                       {isGenerationDisabled
+                                               ? 'Generation (waiting for tokens...)'
+                                               : 'Generation (token output)'}
+                               </p>
                        </Tooltip.Content>
                </Tooltip.Root>
        </div>
 
        <div class="flex items-center gap-1 px-2">
-               {#if activeView === ChatMessageStatsView.GENERATION}
+               {#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats}
                        <BadgeChatStatistic
                                class="bg-transparent"
                                icon={WholeWord}
-                               value="{predictedTokens} tokens"
+                               value="{predictedTokens?.toLocaleString()} tokens"
                                tooltipLabel="Generated tokens"
                        />
                        <BadgeChatStatistic
index a861f23b480f04f2cf0df3e6c2c2904d4e270ccf..4b24cfc691480787db2ac7c0673bd13f42045147 100644 (file)
@@ -1,10 +1,26 @@
 import { activeProcessingState } from '$lib/stores/chat.svelte';
 import { config } from '$lib/stores/settings.svelte';
 
+export interface LiveProcessingStats {
+       tokensProcessed: number;
+       totalTokens: number;
+       timeMs: number;
+       tokensPerSecond: number;
+}
+
+export interface LiveGenerationStats {
+       tokensGenerated: number;
+       timeMs: number;
+       tokensPerSecond: number;
+}
+
 export interface UseProcessingStateReturn {
        readonly processingState: ApiProcessingState | null;
        getProcessingDetails(): string[];
        getProcessingMessage(): string;
+       getPromptProgressText(): string | null;
+       getLiveProcessingStats(): LiveProcessingStats | null;
+       getLiveGenerationStats(): LiveGenerationStats | null;
        shouldShowDetails(): boolean;
        startMonitoring(): void;
        stopMonitoring(): void;
@@ -29,6 +45,7 @@ export interface UseProcessingStateReturn {
 export function useProcessingState(): UseProcessingStateReturn {
        let isMonitoring = $state(false);
        let lastKnownState = $state<ApiProcessingState | null>(null);
+       let lastKnownProcessingStats = $state<LiveProcessingStats | null>(null);
 
        // Derive processing state reactively from chatStore's direct state
        const processingState = $derived.by(() => {
@@ -46,6 +63,25 @@ export function useProcessingState(): UseProcessingStateReturn {
                }
        });
 
+       // Track last known processing stats for when promptProgress disappears
+       $effect(() => {
+               if (processingState?.promptProgress) {
+                       const { processed, total, time_ms, cache } = processingState.promptProgress;
+                       const actualProcessed = processed - cache;
+                       const actualTotal = total - cache;
+
+                       if (actualProcessed > 0 && time_ms > 0) {
+                               const tokensPerSecond = actualProcessed / (time_ms / 1000);
+                               lastKnownProcessingStats = {
+                                       tokensProcessed: actualProcessed,
+                                       totalTokens: actualTotal,
+                                       timeMs: time_ms,
+                                       tokensPerSecond
+                               };
+                       }
+               }
+       });
+
        function startMonitoring(): void {
                if (isMonitoring) return;
                isMonitoring = true;
@@ -59,28 +95,25 @@ export function useProcessingState(): UseProcessingStateReturn {
                const currentConfig = config();
                if (!currentConfig.keepStatsVisible) {
                        lastKnownState = null;
+                       lastKnownProcessingStats = null;
                }
        }
 
        function getProcessingMessage(): string {
-               const state = processingState;
-               if (!state) {
+               if (!processingState) {
                        return 'Processing...';
                }
 
-               switch (state.status) {
+               switch (processingState.status) {
                        case 'initializing':
                                return 'Initializing...';
                        case 'preparing':
-                               if (state.progressPercent !== undefined) {
-                                       return `Processing (${state.progressPercent}%)`;
+                               if (processingState.progressPercent !== undefined) {
+                                       return `Processing (${processingState.progressPercent}%)`;
                                }
                                return 'Preparing response...';
                        case 'generating':
-                               if (state.tokensDecoded > 0) {
-                                       return `Generating... (${state.tokensDecoded} tokens)`;
-                               }
-                               return 'Generating...';
+                               return '';
                        default:
                                return 'Processing...';
                }
@@ -131,8 +164,70 @@ export function useProcessingState(): UseProcessingStateReturn {
        }
 
        function shouldShowDetails(): boolean {
-               const state = processingState;
-               return state !== null && state.status !== 'idle';
+               return processingState !== null && processingState.status !== 'idle';
+       }
+
+       /**
+        * Returns a short progress message with percent
+        */
+       function getPromptProgressText(): string | null {
+               if (!processingState?.promptProgress) return null;
+
+               const { processed, total, cache } = processingState.promptProgress;
+
+               const actualProcessed = processed - cache;
+               const actualTotal = total - cache;
+               const percent = Math.round((actualProcessed / actualTotal) * 100);
+
+               return `Processing ${percent}%`;
+       }
+
+       /**
+        * Returns live processing statistics for display (prompt processing phase)
+        * Returns last known stats when promptProgress becomes unavailable
+        */
+       function getLiveProcessingStats(): LiveProcessingStats | null {
+               if (processingState?.promptProgress) {
+                       const { processed, total, time_ms, cache } = processingState.promptProgress;
+
+                       const actualProcessed = processed - cache;
+                       const actualTotal = total - cache;
+
+                       if (actualProcessed > 0 && time_ms > 0) {
+                               const tokensPerSecond = actualProcessed / (time_ms / 1000);
+
+                               return {
+                                       tokensProcessed: actualProcessed,
+                                       totalTokens: actualTotal,
+                                       timeMs: time_ms,
+                                       tokensPerSecond
+                               };
+                       }
+               }
+
+               // Return last known stats if promptProgress is no longer available
+               return lastKnownProcessingStats;
+       }
+
+       /**
+        * Returns live generation statistics for display (token generation phase)
+        */
+       function getLiveGenerationStats(): LiveGenerationStats | null {
+               if (!processingState) return null;
+
+               const { tokensDecoded, tokensPerSecond } = processingState;
+
+               if (tokensDecoded <= 0) return null;
+
+               // Calculate time from tokens and speed
+               const timeMs =
+                       tokensPerSecond && tokensPerSecond > 0 ? (tokensDecoded / tokensPerSecond) * 1000 : 0;
+
+               return {
+                       tokensGenerated: tokensDecoded,
+                       timeMs,
+                       tokensPerSecond: tokensPerSecond || 0
+               };
        }
 
        return {
@@ -141,6 +236,9 @@ export function useProcessingState(): UseProcessingStateReturn {
                },
                getProcessingDetails,
                getProcessingMessage,
+               getPromptProgressText,
+               getLiveProcessingStats,
+               getLiveGenerationStats,
                shouldShowDetails,
                startMonitoring,
                stopMonitoring
index c03b764419f5cacc762efa1903db808538c65442..86648f3cba007567956a1d4aea5701247bf77ca6 100644 (file)
@@ -117,7 +117,8 @@ export class ChatService {
                                role: msg.role,
                                content: msg.content
                        })),
-                       stream
+                       stream,
+                       return_progress: stream ? true : undefined
                };
 
                // Include model in request if provided (required in ROUTER mode)
@@ -271,7 +272,7 @@ export class ChatService {
                onReasoningChunk?: (chunk: string) => void,
                onToolCallChunk?: (chunk: string) => void,
                onModel?: (model: string) => void,
-               onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+               onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
                conversationId?: string,
                abortSignal?: AbortSignal
        ): Promise<void> {
@@ -366,11 +367,13 @@ export class ChatService {
                                                                onModel?.(chunkModel);
                                                        }
 
-                                                       if (timings || promptProgress) {
+                                                       if (promptProgress) {
+                                                               ChatService.notifyTimings(undefined, promptProgress, onTimings);
+                                                       }
+
+                                                       if (timings) {
                                                                ChatService.notifyTimings(timings, promptProgress, onTimings);
-                                                               if (timings) {
-                                                                       lastTimings = timings;
-                                                               }
+                                                               lastTimings = timings;
                                                        }
 
                                                        if (content) {
@@ -768,10 +771,11 @@ export class ChatService {
                timings: ChatMessageTimings | undefined,
                promptProgress: ChatMessagePromptProgress | undefined,
                onTimingsCallback:
-                       | ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
+                       | ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
                        | undefined
        ): void {
-               if (!timings || !onTimingsCallback) return;
+               if (!onTimingsCallback || (!timings && !promptProgress)) return;
+
                onTimingsCallback(timings, promptProgress);
        }
 }
index 010889452496feb40d1960d492ee7ac9c3a0aee4..86d034e8bed3d7480a507f58df3832cf5fe7bd78 100644 (file)
@@ -324,6 +324,7 @@ class ChatStore {
                        topP: currentConfig.top_p ?? 0.95,
                        speculative: false,
                        progressPercent,
+                       promptProgress,
                        promptTokens,
                        promptMs,
                        cacheTokens
@@ -534,7 +535,7 @@ class ChatStore {
                                        conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
                                },
                                onModel: (modelName: string) => recordModel(modelName),
-                               onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
+                               onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
                                        const tokensPerSecond =
                                                timings?.predicted_ms && timings?.predicted_n
                                                        ? (timings.predicted_n / timings.predicted_ms) * 1000
@@ -1032,7 +1033,7 @@ class ChatStore {
                                                });
                                        },
 
-                                       onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
+                                       onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
                                                const tokensPerSecond =
                                                        timings?.predicted_ms && timings?.predicted_n
                                                                ? (timings.predicted_n / timings.predicted_ms) * 1000
index e5fde24c75d3d4ce7ecf583bbb847a528f5488d2..c2ecc02820a1424ef0858e0e7a9e5c6db181c50f 100644 (file)
@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
        }>;
        stream?: boolean;
        model?: string;
+       return_progress?: boolean;
        // Reasoning parameters
        reasoning_format?: string;
        // Generation parameters
@@ -341,6 +342,7 @@ export interface ApiProcessingState {
        tokensPerSecond?: number;
        // Progress information from prompt_progress
        progressPercent?: number;
+       promptProgress?: ChatMessagePromptProgress;
        promptTokens?: number;
        promptMs?: number;
        cacheTokens?: number;
index 40de98b7084161e0e3ae77eb3fb13c524efdee84..e09f0f332cc121ee45cfa996129343626e0fac09 100644 (file)
@@ -51,7 +51,7 @@ export interface SettingsChatServiceOptions {
        onReasoningChunk?: (chunk: string) => void;
        onToolCallChunk?: (chunk: string) => void;
        onModel?: (model: string) => void;
-       onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
+       onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
        onComplete?: (
                response: string,
                reasoningContent?: string,