const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
const processingState = useProcessingState();
+
let currentConfig = $derived(config());
let isRouter = $derived(isRouterMode());
let displayedModel = $derived((): string | null => {
}
});
+ $effect(() => {
+ if (isLoading() && !message?.content?.trim()) {
+ processingState.startMonitoring();
+ }
+ });
+
function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
const callNumber = index + 1;
const functionName = toolCall.function?.name?.trim();
<div class="mt-6 w-full max-w-[48rem]" in:fade>
<div class="processing-container">
<span class="processing-text">
- {processingState.getProcessingMessage()}
+ {processingState.getPromptProgressText() ?? processingState.getProcessingMessage()}
</span>
</div>
</div>
predictedTokens={message.timings.predicted_n}
predictedMs={message.timings.predicted_ms}
/>
+ {:else if isLoading() && currentConfig.showMessageStats}
+ {@const liveStats = processingState.getLiveProcessingStats()}
+ {@const genStats = processingState.getLiveGenerationStats()}
+ {@const promptProgress = processingState.processingState?.promptProgress}
+ {@const isStillProcessingPrompt =
+ promptProgress && promptProgress.processed < promptProgress.total}
+
+ {#if liveStats || genStats}
+ <ChatMessageStatistics
+ isLive={true}
+ isProcessingPrompt={!!isStillProcessingPrompt}
+ promptTokens={liveStats?.tokensProcessed}
+ promptMs={liveStats?.timeMs}
+ predictedTokens={genStats?.tokensGenerated}
+ predictedMs={genStats?.timeMs}
+ />
+ {/if}
{/if}
</div>
{/if}
import { ChatMessageStatsView } from '$lib/enums';
interface Props {
- predictedTokens: number;
- predictedMs: number;
+ predictedTokens?: number;
+ predictedMs?: number;
promptTokens?: number;
promptMs?: number;
+ // Live mode: when true, shows stats during streaming
+ isLive?: boolean;
+ // Whether prompt processing is still in progress
+ isProcessingPrompt?: boolean;
+ // Initial view to show (defaults to READING in live mode)
+ initialView?: ChatMessageStatsView;
}
- let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props();
+ let {
+ predictedTokens,
+ predictedMs,
+ promptTokens,
+ promptMs,
+ isLive = false,
+ isProcessingPrompt = false,
+ initialView = ChatMessageStatsView.GENERATION
+ }: Props = $props();
- let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION);
+ let activeView: ChatMessageStatsView = $state(initialView);
+ let hasAutoSwitchedToGeneration = $state(false);
- let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
- let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
+ // In live mode: auto-switch to GENERATION tab when prompt processing completes
+ $effect(() => {
+ if (isLive) {
+ // Auto-switch to generation tab only when prompt processing is done (once)
+ if (
+ !hasAutoSwitchedToGeneration &&
+ !isProcessingPrompt &&
+ predictedTokens &&
+ predictedTokens > 0
+ ) {
+ activeView = ChatMessageStatsView.GENERATION;
+ hasAutoSwitchedToGeneration = true;
+ } else if (!hasAutoSwitchedToGeneration) {
+ // Stay on READING while prompt is still being processed
+ activeView = ChatMessageStatsView.READING;
+ }
+ }
+ });
+
+ let hasGenerationStats = $derived(
+ predictedTokens !== undefined &&
+ predictedTokens > 0 &&
+ predictedMs !== undefined &&
+ predictedMs > 0
+ );
+
+ let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
+ let timeInSeconds = $derived(
+ predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
+ );
let promptTokensPerSecond = $derived(
- promptTokens !== undefined && promptMs !== undefined
+ promptTokens !== undefined && promptMs !== undefined && promptMs > 0
? (promptTokens / promptMs) * 1000
: undefined
);
promptTokensPerSecond !== undefined &&
promptTimeInSeconds !== undefined
);
+
+ // In live mode, generation tab is disabled until we have generation stats
+ let isGenerationDisabled = $derived(isLive && !hasGenerationStats);
</script>
<div class="inline-flex items-center text-xs text-muted-foreground">
<div class="inline-flex items-center rounded-sm bg-muted-foreground/15 p-0.5">
- {#if hasPromptStats}
+ {#if hasPromptStats || isLive}
<Tooltip.Root>
<Tooltip.Trigger>
<button
class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
ChatMessageStatsView.GENERATION
? 'bg-background text-foreground shadow-sm'
- : 'hover:text-foreground'}"
- onclick={() => (activeView = ChatMessageStatsView.GENERATION)}
+ : isGenerationDisabled
+ ? 'cursor-not-allowed opacity-40'
+ : 'hover:text-foreground'}"
+ onclick={() => !isGenerationDisabled && (activeView = ChatMessageStatsView.GENERATION)}
+ disabled={isGenerationDisabled}
>
<Sparkles class="h-3 w-3" />
<span class="sr-only">Generation</span>
</button>
</Tooltip.Trigger>
<Tooltip.Content>
- <p>Generation (token output)</p>
+ <p>
+ {isGenerationDisabled
+ ? 'Generation (waiting for tokens...)'
+ : 'Generation (token output)'}
+ </p>
</Tooltip.Content>
</Tooltip.Root>
</div>
<div class="flex items-center gap-1 px-2">
- {#if activeView === ChatMessageStatsView.GENERATION}
+ {#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats}
<BadgeChatStatistic
class="bg-transparent"
icon={WholeWord}
- value="{predictedTokens} tokens"
+ value="{predictedTokens?.toLocaleString()} tokens"
tooltipLabel="Generated tokens"
/>
<BadgeChatStatistic
import { activeProcessingState } from '$lib/stores/chat.svelte';
import { config } from '$lib/stores/settings.svelte';
+export interface LiveProcessingStats {
+ tokensProcessed: number;
+ totalTokens: number;
+ timeMs: number;
+ tokensPerSecond: number;
+}
+
+export interface LiveGenerationStats {
+ tokensGenerated: number;
+ timeMs: number;
+ tokensPerSecond: number;
+}
+
export interface UseProcessingStateReturn {
readonly processingState: ApiProcessingState | null;
getProcessingDetails(): string[];
getProcessingMessage(): string;
+ getPromptProgressText(): string | null;
+ getLiveProcessingStats(): LiveProcessingStats | null;
+ getLiveGenerationStats(): LiveGenerationStats | null;
shouldShowDetails(): boolean;
startMonitoring(): void;
stopMonitoring(): void;
export function useProcessingState(): UseProcessingStateReturn {
let isMonitoring = $state(false);
let lastKnownState = $state<ApiProcessingState | null>(null);
+ let lastKnownProcessingStats = $state<LiveProcessingStats | null>(null);
// Derive processing state reactively from chatStore's direct state
const processingState = $derived.by(() => {
}
});
+ // Track last known processing stats for when promptProgress disappears
+ $effect(() => {
+ if (processingState?.promptProgress) {
+ const { processed, total, time_ms, cache } = processingState.promptProgress;
+ const actualProcessed = processed - cache;
+ const actualTotal = total - cache;
+
+ if (actualProcessed > 0 && time_ms > 0) {
+ const tokensPerSecond = actualProcessed / (time_ms / 1000);
+ lastKnownProcessingStats = {
+ tokensProcessed: actualProcessed,
+ totalTokens: actualTotal,
+ timeMs: time_ms,
+ tokensPerSecond
+ };
+ }
+ }
+ });
+
function startMonitoring(): void {
if (isMonitoring) return;
isMonitoring = true;
const currentConfig = config();
if (!currentConfig.keepStatsVisible) {
lastKnownState = null;
+ lastKnownProcessingStats = null;
}
}
function getProcessingMessage(): string {
- const state = processingState;
- if (!state) {
+ if (!processingState) {
return 'Processing...';
}
- switch (state.status) {
+ switch (processingState.status) {
case 'initializing':
return 'Initializing...';
case 'preparing':
- if (state.progressPercent !== undefined) {
- return `Processing (${state.progressPercent}%)`;
+ if (processingState.progressPercent !== undefined) {
+ return `Processing (${processingState.progressPercent}%)`;
}
return 'Preparing response...';
case 'generating':
- if (state.tokensDecoded > 0) {
- return `Generating... (${state.tokensDecoded} tokens)`;
- }
- return 'Generating...';
+ return '';
default:
return 'Processing...';
}
}
function shouldShowDetails(): boolean {
- const state = processingState;
- return state !== null && state.status !== 'idle';
+ return processingState !== null && processingState.status !== 'idle';
+ }
+
+ /**
+ * Returns a short progress message with percent
+ */
+ function getPromptProgressText(): string | null {
+ if (!processingState?.promptProgress) return null;
+
+ const { processed, total, cache } = processingState.promptProgress;
+
+ const actualProcessed = processed - cache;
+ const actualTotal = total - cache;
+ const percent = Math.round((actualProcessed / actualTotal) * 100);
+
+ return `Processing ${percent}%`;
+ }
+
+ /**
+ * Returns live processing statistics for display (prompt processing phase)
+ * Returns last known stats when promptProgress becomes unavailable
+ */
+ function getLiveProcessingStats(): LiveProcessingStats | null {
+ if (processingState?.promptProgress) {
+ const { processed, total, time_ms, cache } = processingState.promptProgress;
+
+ const actualProcessed = processed - cache;
+ const actualTotal = total - cache;
+
+ if (actualProcessed > 0 && time_ms > 0) {
+ const tokensPerSecond = actualProcessed / (time_ms / 1000);
+
+ return {
+ tokensProcessed: actualProcessed,
+ totalTokens: actualTotal,
+ timeMs: time_ms,
+ tokensPerSecond
+ };
+ }
+ }
+
+ // Return last known stats if promptProgress is no longer available
+ return lastKnownProcessingStats;
+ }
+
+ /**
+ * Returns live generation statistics for display (token generation phase)
+ */
+ function getLiveGenerationStats(): LiveGenerationStats | null {
+ if (!processingState) return null;
+
+ const { tokensDecoded, tokensPerSecond } = processingState;
+
+ if (tokensDecoded <= 0) return null;
+
+ // Calculate time from tokens and speed
+ const timeMs =
+ tokensPerSecond && tokensPerSecond > 0 ? (tokensDecoded / tokensPerSecond) * 1000 : 0;
+
+ return {
+ tokensGenerated: tokensDecoded,
+ timeMs,
+ tokensPerSecond: tokensPerSecond || 0
+ };
}
return {
},
getProcessingDetails,
getProcessingMessage,
+ getPromptProgressText,
+ getLiveProcessingStats,
+ getLiveGenerationStats,
shouldShowDetails,
startMonitoring,
stopMonitoring
role: msg.role,
content: msg.content
})),
- stream
+ stream,
+ return_progress: stream ? true : undefined
};
// Include model in request if provided (required in ROUTER mode)
onReasoningChunk?: (chunk: string) => void,
onToolCallChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
- onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+ onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
conversationId?: string,
abortSignal?: AbortSignal
): Promise<void> {
onModel?.(chunkModel);
}
- if (timings || promptProgress) {
+ if (promptProgress) {
+ ChatService.notifyTimings(undefined, promptProgress, onTimings);
+ }
+
+ if (timings) {
ChatService.notifyTimings(timings, promptProgress, onTimings);
- if (timings) {
- lastTimings = timings;
- }
+ lastTimings = timings;
}
if (content) {
timings: ChatMessageTimings | undefined,
promptProgress: ChatMessagePromptProgress | undefined,
onTimingsCallback:
- | ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
+ | ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
| undefined
): void {
- if (!timings || !onTimingsCallback) return;
+ if (!onTimingsCallback || (!timings && !promptProgress)) return;
+
onTimingsCallback(timings, promptProgress);
}
}
topP: currentConfig.top_p ?? 0.95,
speculative: false,
progressPercent,
+ promptProgress,
promptTokens,
promptMs,
cacheTokens
conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
},
onModel: (modelName: string) => recordModel(modelName),
- onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
+ onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
const tokensPerSecond =
timings?.predicted_ms && timings?.predicted_n
? (timings.predicted_n / timings.predicted_ms) * 1000
});
},
- onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
+ onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
const tokensPerSecond =
timings?.predicted_ms && timings?.predicted_n
? (timings.predicted_n / timings.predicted_ms) * 1000
}>;
stream?: boolean;
model?: string;
+ return_progress?: boolean;
// Reasoning parameters
reasoning_format?: string;
// Generation parameters
tokensPerSecond?: number;
// Progress information from prompt_progress
progressPercent?: number;
+ promptProgress?: ChatMessagePromptProgress;
promptTokens?: number;
promptMs?: number;
cacheTokens?: number;
onReasoningChunk?: (chunk: string) => void;
onToolCallChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
- onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
+ onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
onComplete?: (
response: string,
reasoningContent?: string,