]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
webui: auto-refresh /props on inference start to resync model metadata (#16784)
authorPascal <redacted>
Sat, 1 Nov 2025 18:49:51 +0000 (19:49 +0100)
committerGitHub <redacted>
Sat, 1 Nov 2025 18:49:51 +0000 (19:49 +0100)
* webui: auto-refresh /props on inference start to resync model metadata

- Add no-cache headers to /props and /slots
- Throttle slot checks to 30s
- Prevent concurrent fetches with promise guard
- Trigger refresh from chat streaming for legacy and ModelSelector
- Show dynamic serverWarning when using cached data

* fix: restore proper legacy behavior in webui by using unified /props refresh

Updated assistant message bubbles to show each message's stored model when available,
falling back to the current server model only when the per-message value is missing

When the model selector is disabled, now fetches /props and prioritizes that model name
over chunk metadata, then persists it with the streamed message so legacy mode properly
reflects the backend configuration

* fix: detect first valid SSE chunk and refresh server props once

* fix: removed the slots availability throttle constant and state

* webui: purge ai-generated cruft

* chore: update webui static build

tools/server/public/index.html.gz
tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
tools/server/webui/src/lib/services/chat.ts
tools/server/webui/src/lib/stores/chat.svelte.ts
tools/server/webui/src/lib/stores/server.svelte.ts
tools/server/webui/src/lib/types/api.d.ts
tools/server/webui/src/lib/types/settings.d.ts

index 501fa455a24169b1c00bb2ebe056c3719b4ce1e8..b71690cc81e29b13069e5b548016b91ba074014e 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index 41774c6f874ae3db367a88e9ad22d1b65448d2dd..d8f5630fd14f7462cc5547ded0dd230793376fa0 100644 (file)
@@ -85,8 +85,8 @@
        let displayedModel = $derived((): string | null => {
                if (!currentConfig.showModelInfo) return null;
 
-               if (currentConfig.modelSelectorEnabled) {
-                       return message.model ?? null;
+               if (message.model) {
+                       return message.model;
                }
 
                return serverModel;
index df03b10251ac2ce9d72bdb6d4d27239d815901c1..414e060764d7ece8c2ce3aad55100d09a8825c70 100644 (file)
@@ -54,6 +54,7 @@ export class ChatService {
                        onError,
                        onReasoningChunk,
                        onModel,
+                       onFirstValidChunk,
                        // Generation parameters
                        temperature,
                        max_tokens,
@@ -201,6 +202,7 @@ export class ChatService {
                                        onError,
                                        onReasoningChunk,
                                        onModel,
+                                       onFirstValidChunk,
                                        conversationId,
                                        abortController.signal
                                );
@@ -267,6 +269,7 @@ export class ChatService {
                onError?: (error: Error) => void,
                onReasoningChunk?: (chunk: string) => void,
                onModel?: (model: string) => void,
+               onFirstValidChunk?: () => void,
                conversationId?: string,
                abortSignal?: AbortSignal
        ): Promise<void> {
@@ -283,6 +286,7 @@ export class ChatService {
                let lastTimings: ChatMessageTimings | undefined;
                let streamFinished = false;
                let modelEmitted = false;
+               let firstValidChunkEmitted = false;
 
                try {
                        let chunk = '';
@@ -311,10 +315,12 @@ export class ChatService {
                                                try {
                                                        const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
 
-                                                       const chunkModel = this.extractModelName(parsed);
-                                                       if (chunkModel && !modelEmitted) {
-                                                               modelEmitted = true;
-                                                               onModel?.(chunkModel);
+                                                       if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
+                                                               firstValidChunkEmitted = true;
+
+                                                               if (!abortSignal?.aborted) {
+                                                                       onFirstValidChunk?.();
+                                                               }
                                                        }
 
                                                        const content = parsed.choices[0]?.delta?.content;
@@ -322,6 +328,12 @@ export class ChatService {
                                                        const timings = parsed.timings;
                                                        const promptProgress = parsed.prompt_progress;
 
+                                                       const chunkModel = this.extractModelName(parsed);
+                                                       if (chunkModel && !modelEmitted) {
+                                                               modelEmitted = true;
+                                                               onModel?.(chunkModel);
+                                                       }
+
                                                        if (timings || promptProgress) {
                                                                this.updateProcessingState(timings, promptProgress, conversationId);
                                                                if (timings) {
index a2e74a2e10721604cd76a5dcfa14765265e67bb2..3f97a89183d826c533bda8f0a0cccdafb1d4cc03 100644 (file)
@@ -1,6 +1,7 @@
 import { DatabaseStore } from '$lib/stores/database';
 import { chatService, slotsService } from '$lib/services';
 import { config } from '$lib/stores/settings.svelte';
+import { serverStore } from '$lib/stores/server.svelte';
 import { normalizeModelName } from '$lib/utils/model-names';
 import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
 import { browser } from '$app/environment';
@@ -362,9 +363,41 @@ class ChatStore {
 
                let resolvedModel: string | null = null;
                let modelPersisted = false;
+               const currentConfig = config();
+               const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
+               let serverPropsRefreshed = false;
+               let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
+
+               const refreshServerPropsOnce = () => {
+                       if (serverPropsRefreshed) {
+                               return;
+                       }
+
+                       serverPropsRefreshed = true;
+
+                       const hasExistingProps = serverStore.serverProps !== null;
 
-               const recordModel = (modelName: string, persistImmediately = true): void => {
-                       const normalizedModel = normalizeModelName(modelName);
+                       serverStore
+                               .fetchServerProps({ silent: hasExistingProps })
+                               .then(() => {
+                                       updateModelFromServerProps?.(true);
+                               })
+                               .catch((error) => {
+                                       console.warn('Failed to refresh server props after streaming started:', error);
+                               });
+               };
+
+               const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
+                       const serverModelName = serverStore.modelName;
+                       const preferredModelSource = preferServerPropsModel
+                               ? (serverModelName ?? modelName ?? null)
+                               : (modelName ?? serverModelName ?? null);
+
+                       if (!preferredModelSource) {
+                               return;
+                       }
+
+                       const normalizedModel = normalizeModelName(preferredModelSource);
 
                        if (!normalizedModel || normalizedModel === resolvedModel) {
                                return;
@@ -388,6 +421,20 @@ class ChatStore {
                        }
                };
 
+               if (preferServerPropsModel) {
+                       updateModelFromServerProps = (persistImmediately = true) => {
+                               const currentServerModel = serverStore.modelName;
+
+                               if (!currentServerModel) {
+                                       return;
+                               }
+
+                               recordModel(currentServerModel, persistImmediately);
+                       };
+
+                       updateModelFromServerProps(false);
+               }
+
                slotsService.startStreaming();
                slotsService.setActiveConversation(assistantMessage.convId);
 
@@ -396,6 +443,9 @@ class ChatStore {
                        {
                                ...this.getApiOptions(),
 
+                               onFirstValidChunk: () => {
+                                       refreshServerPropsOnce();
+                               },
                                onChunk: (chunk: string) => {
                                        streamedContent += chunk;
                                        this.setConversationStreaming(
index 1fd4afb04022f35384e4b9501e83c3b0b4df51e8..c7056cc120eda8335461b6ac65ec02e92f66ec8e 100644 (file)
@@ -52,6 +52,7 @@ class ServerStore {
        private _error = $state<string | null>(null);
        private _serverWarning = $state<string | null>(null);
        private _slotsEndpointAvailable = $state<boolean | null>(null);
+       private fetchServerPropsPromise: Promise<void> | null = null;
 
        private readCachedServerProps(): ApiLlamaCppServerProps | null {
                if (!browser) return null;
@@ -171,73 +172,65 @@ class ServerStore {
        /**
         * Fetches server properties from the server
         */
-       async fetchServerProps(): Promise<void> {
-               this._loading = true;
-               this._error = null;
-               this._serverWarning = null;
+       async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
+               const { silent = false } = options;
+               const isSilent = silent && this._serverProps !== null;
 
-               try {
-                       console.log('Fetching server properties...');
-                       const props = await ChatService.getServerProps();
-                       this._serverProps = props;
-                       this.persistServerProps(props);
-                       console.log('Server properties loaded:', props);
-
-                       // Check slots endpoint availability after server props are loaded
-                       await this.checkSlotsEndpointAvailability();
-               } catch (error) {
-                       const hadCachedProps = this._serverProps !== null;
-                       let errorMessage = 'Failed to connect to server';
-                       let isOfflineLikeError = false;
-                       let isServerSideError = false;
-
-                       if (error instanceof Error) {
-                               // Handle specific error types with user-friendly messages
-                               if (error.name === 'TypeError' && error.message.includes('fetch')) {
-                                       errorMessage = 'Server is not running or unreachable';
-                                       isOfflineLikeError = true;
-                               } else if (error.message.includes('ECONNREFUSED')) {
-                                       errorMessage = 'Connection refused - server may be offline';
-                                       isOfflineLikeError = true;
-                               } else if (error.message.includes('ENOTFOUND')) {
-                                       errorMessage = 'Server not found - check server address';
-                                       isOfflineLikeError = true;
-                               } else if (error.message.includes('ETIMEDOUT')) {
-                                       errorMessage = 'Request timed out - the server took too long to respond';
-                                       isOfflineLikeError = true;
-                               } else if (error.message.includes('503')) {
-                                       errorMessage = 'Server temporarily unavailable - try again shortly';
-                                       isServerSideError = true;
-                               } else if (error.message.includes('500')) {
-                                       errorMessage = 'Server error - check server logs';
-                                       isServerSideError = true;
-                               } else if (error.message.includes('404')) {
-                                       errorMessage = 'Server endpoint not found';
-                               } else if (error.message.includes('403') || error.message.includes('401')) {
-                                       errorMessage = 'Access denied';
+               if (this.fetchServerPropsPromise) {
+                       return this.fetchServerPropsPromise;
+               }
+
+               if (!isSilent) {
+                       this._loading = true;
+                       this._error = null;
+                       this._serverWarning = null;
+               }
+
+               const hadProps = this._serverProps !== null;
+
+               const fetchPromise = (async () => {
+                       try {
+                               const props = await ChatService.getServerProps();
+                               this._serverProps = props;
+                               this.persistServerProps(props);
+                               this._error = null;
+                               this._serverWarning = null;
+                               await this.checkSlotsEndpointAvailability();
+                       } catch (error) {
+                               if (isSilent && hadProps) {
+                                       console.warn('Silent server props refresh failed, keeping cached data:', error);
+                                       return;
+                               }
+
+                               this.handleFetchServerPropsError(error, hadProps);
+                       } finally {
+                               if (!isSilent) {
+                                       this._loading = false;
                                }
+
+                               this.fetchServerPropsPromise = null;
                        }
+               })();
+
+               this.fetchServerPropsPromise = fetchPromise;
+
+               await fetchPromise;
+       }
 
-                       let cachedProps: ApiLlamaCppServerProps | null = null;
+       /**
+        * Handles fetch failures by attempting to recover cached server props and
+        * updating the user-facing error or warning state appropriately.
+        */
+       private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
+               const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
 
-                       if (!hadCachedProps) {
-                               cachedProps = this.readCachedServerProps();
-                               if (cachedProps) {
-                                       this._serverProps = cachedProps;
-                                       this._error = null;
+               let cachedProps: ApiLlamaCppServerProps | null = null;
 
-                                       if (isOfflineLikeError || isServerSideError) {
-                                               this._serverWarning = errorMessage;
-                                       }
+               if (!hadProps) {
+                       cachedProps = this.readCachedServerProps();
 
-                                       console.warn(
-                                               'Failed to refresh server properties, using cached values from localStorage:',
-                                               errorMessage
-                                       );
-                               } else {
-                                       this._error = errorMessage;
-                               }
-                       } else {
+                       if (cachedProps) {
+                               this._serverProps = cachedProps;
                                this._error = null;
 
                                if (isOfflineLikeError || isServerSideError) {
@@ -245,14 +238,66 @@ class ServerStore {
                                }
 
                                console.warn(
-                                       'Failed to refresh server properties, continuing with cached values:',
+                                       'Failed to refresh server properties, using cached values from localStorage:',
                                        errorMessage
                                );
+                       } else {
+                               this._error = errorMessage;
+                       }
+               } else {
+                       this._error = null;
+
+                       if (isOfflineLikeError || isServerSideError) {
+                               this._serverWarning = errorMessage;
                        }
-                       console.error('Error fetching server properties:', error);
-               } finally {
-                       this._loading = false;
+
+                       console.warn(
+                               'Failed to refresh server properties, continuing with cached values:',
+                               errorMessage
+                       );
                }
+
+               console.error('Error fetching server properties:', error);
+       }
+
+       private normalizeFetchError(error: unknown): {
+               errorMessage: string;
+               isOfflineLikeError: boolean;
+               isServerSideError: boolean;
+       } {
+               let errorMessage = 'Failed to connect to server';
+               let isOfflineLikeError = false;
+               let isServerSideError = false;
+
+               if (error instanceof Error) {
+                       const message = error.message || '';
+
+                       if (error.name === 'TypeError' && message.includes('fetch')) {
+                               errorMessage = 'Server is not running or unreachable';
+                               isOfflineLikeError = true;
+                       } else if (message.includes('ECONNREFUSED')) {
+                               errorMessage = 'Connection refused - server may be offline';
+                               isOfflineLikeError = true;
+                       } else if (message.includes('ENOTFOUND')) {
+                               errorMessage = 'Server not found - check server address';
+                               isOfflineLikeError = true;
+                       } else if (message.includes('ETIMEDOUT')) {
+                               errorMessage = 'Request timed out - the server took too long to respond';
+                               isOfflineLikeError = true;
+                       } else if (message.includes('503')) {
+                               errorMessage = 'Server temporarily unavailable - try again shortly';
+                               isServerSideError = true;
+                       } else if (message.includes('500')) {
+                               errorMessage = 'Server error - check server logs';
+                               isServerSideError = true;
+                       } else if (message.includes('404')) {
+                               errorMessage = 'Server endpoint not found';
+                       } else if (message.includes('403') || message.includes('401')) {
+                               errorMessage = 'Access denied';
+                       }
+               }
+
+               return { errorMessage, isOfflineLikeError, isServerSideError };
        }
 
        /**
@@ -264,6 +309,7 @@ class ServerStore {
                this._serverWarning = null;
                this._loading = false;
                this._slotsEndpointAvailable = null;
+               this.fetchServerPropsPromise = null;
                this.persistServerProps(null);
        }
 }
index 6d76ab1f68e9dc3279791a1e0058d6c4f7f9e72c..6ebc43db0e3ef72160c6c7c34894ece79a9cabdb 100644 (file)
@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
 }
 
 export interface ApiChatCompletionStreamChunk {
+       object?: string;
        model?: string;
        choices: Array<{
                model?: string;
index 659fb0c7d1cf5624266321f7bedd08ca3f341d8e..946ef015e92e111b3c809b77201bb3cb63d65870 100644 (file)
@@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions {
        onChunk?: (chunk: string) => void;
        onReasoningChunk?: (chunk: string) => void;
        onModel?: (model: string) => void;
+       onFirstValidChunk?: () => void;
        onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
        onError?: (error: Error) => void;
 }