]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
webui: fix prompt progress ETA calculation (#18468)
authorXuan-Son Nguyen <redacted>
Mon, 29 Dec 2025 20:42:11 +0000 (21:42 +0100)
committerGitHub <redacted>
Mon, 29 Dec 2025 20:42:11 +0000 (21:42 +0100)
* webui: fix prompt progress ETA calculation

* handle case done === 0

tools/server/public/index.html.gz
tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts
tools/server/webui/src/lib/stores/chat.svelte.ts

index fad15e38e9a396e834c28beaec187c0d58bbf3f2..d1c10eed91297c061f4d5dbd18dec4e620ae5bf1 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index 4b24cfc691480787db2ac7c0673bd13f42045147..c06cf28864ae453597712d5e066fd8deb8af076d 100644 (file)
@@ -6,6 +6,7 @@ export interface LiveProcessingStats {
        totalTokens: number;
        timeMs: number;
        tokensPerSecond: number;
+       etaSecs?: number;
 }
 
 export interface LiveGenerationStats {
@@ -82,6 +83,15 @@ export function useProcessingState(): UseProcessingStateReturn {
                }
        });
 
+       function getETASecs(done: number, total: number, elapsedMs: number): number | undefined {
+               const elapsedSecs = elapsedMs / 1000;
+               const progressETASecs =
+                       done === 0 || elapsedSecs < 0.5
+                               ? undefined // can be the case for the 0% progress report
+                               : elapsedSecs * (total / done - 1);
+               return progressETASecs;
+       }
+
        function startMonitoring(): void {
                if (isMonitoring) return;
                isMonitoring = true;
@@ -178,6 +188,12 @@ export function useProcessingState(): UseProcessingStateReturn {
                const actualProcessed = processed - cache;
                const actualTotal = total - cache;
                const percent = Math.round((actualProcessed / actualTotal) * 100);
+               const eta = getETASecs(actualProcessed, actualTotal, processingState.promptProgress.time_ms);
+
+               if (eta !== undefined) {
+                       const etaSecs = Math.ceil(eta);
+                       return `Processing ${percent}% (ETA: ${etaSecs}s)`;
+               }
 
                return `Processing ${percent}%`;
        }
index 86d034e8bed3d7480a507f58df3832cf5fe7bd78..67157e36ac0abd2a479c8a957208d74e6e3a7fd5 100644 (file)
@@ -303,11 +303,17 @@ class ChatStore {
                const currentConfig = config();
                const outputTokensMax = currentConfig.max_tokens || -1;
 
+               // Note: for timings data, the n_prompt does NOT include cache tokens
                const contextUsed = promptTokens + cacheTokens + predictedTokens;
                const outputTokensUsed = predictedTokens;
 
+               // Note: for prompt progress, the "processed" DOES include cache tokens
+               // we need to exclude them to get the real prompt tokens processed count
+               const progressCache = promptProgress?.cache || 0;
+               const progressActualDone = (promptProgress?.processed ?? 0) - progressCache;
+               const progressActualTotal = (promptProgress?.total ?? 0) - progressCache;
                const progressPercent = promptProgress
-                       ? Math.round((promptProgress.processed / promptProgress.total) * 100)
+                       ? Math.round((progressActualDone / progressActualTotal) * 100)
                        : undefined;
 
                return {