]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
Allow viewing conversations even when llama server is down (#16255)
authorAleksander Grygier <redacted>
Fri, 26 Sep 2025 16:35:42 +0000 (18:35 +0200)
committerGitHub <redacted>
Fri, 26 Sep 2025 16:35:42 +0000 (18:35 +0200)
* webui: allow viewing conversations and sending messages even if llama-server is down

- Cached llama.cpp server properties in browser localStorage on startup, persisting successful fetches and reloading them when refresh attempts fail so the chat UI continues to render while the backend is unavailable.
- Cleared the stored server properties when resetting the store to prevent stale capability data after cache-backed operation.
- Kept the original error-splash behavior when no cached props exist so fresh installs still surface a clear failure state instead of rendering stale data.

* feat: Add UI for `props` endpoint unavailable + cleanup logic

* webui: extend cached props fallback to offline errors

Treat connection failures (refused, DNS, timeout, fetch) the same way as
server 5xx so the warning banner shows up when cache is available, instead
of falling back to a full error screen.

* webui: Left the chat form enabled when a server warning is present so operators can keep sending messages

e.g., to restart the backend over llama-swap, even while cached /props data is in use

* chore: update webui build output

---------

Co-authored-by: Pascal <redacted>
tools/server/public/index.html.gz
tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte [new file with mode: 0644]
tools/server/webui/src/lib/components/app/index.ts
tools/server/webui/src/lib/constants/localstorage-keys.ts [new file with mode: 0644]
tools/server/webui/src/lib/stores/server.svelte.ts
tools/server/webui/src/lib/utils/api-key-validation.ts

index f12ff3e62e4aa5cbcc060e7366ea2827bbbd358d..53c6a9b5cfb525d99241072c8cbbd511a02f6793 100644 (file)
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
index 17215b713a5cb2ab171049e2a6468d02935e459a..666febf0d28d68d6444850fafabd63342516e2ee 100644 (file)
@@ -3,9 +3,11 @@
        import {
                ChatForm,
                ChatScreenHeader,
+               ChatScreenWarning,
                ChatMessages,
                ChatProcessingInfo,
                EmptyFileAlertDialog,
+               ServerErrorSplash,
                ServerInfo,
                ServerLoadingSplash,
                ConfirmationDialog
@@ -29,6 +31,7 @@
                supportsVision,
                supportsAudio,
                serverLoading,
+               serverWarning,
                serverStore
        } from '$lib/stores/server.svelte';
        import { contextService } from '$lib/services';
                >
                        <ChatProcessingInfo />
 
+                       {#if serverWarning()}
+                               <ChatScreenWarning class="pointer-events-auto mx-auto max-w-[48rem] px-4" />
+                       {/if}
+
                        <div class="conversation-chat-form pointer-events-auto rounded-t-3xl pb-4">
                                <ChatForm
                                        isLoading={isLoading()}
 {:else if isServerLoading}
        <!-- Server Loading State -->
        <ServerLoadingSplash />
+{:else if serverStore.error && !serverStore.modelName}
+       <ServerErrorSplash error={serverStore.error} />
 {:else if serverStore.modelName}
        <div
                aria-label="Welcome screen with file drop zone"
                                <ServerInfo />
                        </div>
 
+                       {#if serverWarning()}
+                               <ChatScreenWarning />
+                       {/if}
+
                        <div in:fly={{ y: 10, duration: 250, delay: 300 }}>
                                <ChatForm
                                        isLoading={isLoading()}
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte
new file mode 100644 (file)
index 0000000..8b8d916
--- /dev/null
@@ -0,0 +1,38 @@
+<script lang="ts">
+       import { AlertTriangle, RefreshCw } from '@lucide/svelte';
+       import { serverLoading, serverStore } from '$lib/stores/server.svelte';
+       import { fly } from 'svelte/transition';
+
+       interface Props {
+               class?: string;
+       }
+
+       let { class: className = '' }: Props = $props();
+
+       function handleRefreshServer() {
+               serverStore.fetchServerProps();
+       }
+</script>
+
+<div class="mb-3 {className}" in:fly={{ y: 10, duration: 250 }}>
+       <div
+               class="rounded-md border border-yellow-200 bg-yellow-50 px-3 py-2 dark:border-yellow-800 dark:bg-yellow-950"
+       >
+               <div class="flex items-center justify-between">
+                       <div class="flex items-center">
+                               <AlertTriangle class="h-4 w-4 text-yellow-600 dark:text-yellow-400" />
+                               <p class="ml-2 text-sm text-yellow-800 dark:text-yellow-200">
+                                       Server `/props` endpoint not available - using cached data
+                               </p>
+                       </div>
+                       <button
+                               onclick={handleRefreshServer}
+                               disabled={serverLoading()}
+                               class="ml-3 flex items-center gap-1.5 rounded bg-yellow-100 px-2 py-1 text-xs font-medium text-yellow-800 hover:bg-yellow-200 disabled:opacity-50 dark:bg-yellow-900 dark:text-yellow-200 dark:hover:bg-yellow-800"
+                       >
+                               <RefreshCw class="h-3 w-3 {serverLoading() ? 'animate-spin' : ''}" />
+                               {serverLoading() ? 'Checking...' : 'Retry'}
+                       </button>
+               </div>
+       </div>
+</div>
index 2f559bd623a519801a0f488cd60fa38ccbd405f5..7c1af27ecd3fd47ff65d5973d82f75f7b6dcf6db 100644 (file)
@@ -19,6 +19,7 @@ export { default as MessageBranchingControls } from './chat/ChatMessages/ChatMes
 export { default as ChatProcessingInfo } from './chat/ChatProcessingInfo.svelte';
 
 export { default as ChatScreenHeader } from './chat/ChatScreen/ChatScreenHeader.svelte';
+export { default as ChatScreenWarning } from './chat/ChatScreen/ChatScreenWarning.svelte';
 export { default as ChatScreen } from './chat/ChatScreen/ChatScreen.svelte';
 
 export { default as ChatSettingsDialog } from './chat/ChatSettings/ChatSettingsDialog.svelte';
diff --git a/tools/server/webui/src/lib/constants/localstorage-keys.ts b/tools/server/webui/src/lib/constants/localstorage-keys.ts
new file mode 100644 (file)
index 0000000..9fcc7ba
--- /dev/null
@@ -0,0 +1 @@
+export const SERVER_PROPS_LOCALSTORAGE_KEY = 'LlamaCppWebui.serverProps';
index 1b587c064425c1f3b64b230be1ae24cfa953b0f1..a8624562d1c5f43343836d1cd33e70d25c195d97 100644 (file)
@@ -1,3 +1,5 @@
+import { browser } from '$app/environment';
+import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
 import { ChatService } from '$lib/services/chat';
 import { config } from '$lib/stores/settings.svelte';
 
@@ -34,12 +36,51 @@ import { config } from '$lib/stores/settings.svelte';
  * - Slots endpoint availability (for processing state monitoring)
  * - Context window size and token limits
  */
+
 class ServerStore {
+       constructor() {
+               if (!browser) return;
+
+               const cachedProps = this.readCachedServerProps();
+               if (cachedProps) {
+                       this._serverProps = cachedProps;
+               }
+       }
+
        private _serverProps = $state<ApiLlamaCppServerProps | null>(null);
        private _loading = $state(false);
        private _error = $state<string | null>(null);
+       private _serverWarning = $state<string | null>(null);
        private _slotsEndpointAvailable = $state<boolean | null>(null);
 
+       private readCachedServerProps(): ApiLlamaCppServerProps | null {
+               if (!browser) return null;
+
+               try {
+                       const raw = localStorage.getItem(SERVER_PROPS_LOCALSTORAGE_KEY);
+                       if (!raw) return null;
+
+                       return JSON.parse(raw) as ApiLlamaCppServerProps;
+               } catch (error) {
+                       console.warn('Failed to read cached server props from localStorage:', error);
+                       return null;
+               }
+       }
+
+       private persistServerProps(props: ApiLlamaCppServerProps | null): void {
+               if (!browser) return;
+
+               try {
+                       if (props) {
+                               localStorage.setItem(SERVER_PROPS_LOCALSTORAGE_KEY, JSON.stringify(props));
+                       } else {
+                               localStorage.removeItem(SERVER_PROPS_LOCALSTORAGE_KEY);
+                       }
+               } catch (error) {
+                       console.warn('Failed to persist server props to localStorage:', error);
+               }
+       }
+
        get serverProps(): ApiLlamaCppServerProps | null {
                return this._serverProps;
        }
@@ -52,6 +93,10 @@ class ServerStore {
                return this._error;
        }
 
+       get serverWarning(): string | null {
+               return this._serverWarning;
+       }
+
        get modelName(): string | null {
                if (!this._serverProps?.model_path) return null;
                return this._serverProps.model_path.split(/(\\|\/)/).pop() || null;
@@ -123,30 +168,43 @@ class ServerStore {
        async fetchServerProps(): Promise<void> {
                this._loading = true;
                this._error = null;
+               this._serverWarning = null;
 
                try {
                        console.log('Fetching server properties...');
                        const props = await ChatService.getServerProps();
                        this._serverProps = props;
+                       this.persistServerProps(props);
                        console.log('Server properties loaded:', props);
 
                        // Check slots endpoint availability after server props are loaded
                        await this.checkSlotsEndpointAvailability();
                } catch (error) {
+                       const hadCachedProps = this._serverProps !== null;
                        let errorMessage = 'Failed to connect to server';
+                       let isOfflineLikeError = false;
+                       let isServerSideError = false;
 
                        if (error instanceof Error) {
                                // Handle specific error types with user-friendly messages
                                if (error.name === 'TypeError' && error.message.includes('fetch')) {
                                        errorMessage = 'Server is not running or unreachable';
+                                       isOfflineLikeError = true;
                                } else if (error.message.includes('ECONNREFUSED')) {
                                        errorMessage = 'Connection refused - server may be offline';
+                                       isOfflineLikeError = true;
                                } else if (error.message.includes('ENOTFOUND')) {
                                        errorMessage = 'Server not found - check server address';
+                                       isOfflineLikeError = true;
                                } else if (error.message.includes('ETIMEDOUT')) {
                                        errorMessage = 'Connection timeout - server may be overloaded';
+                                       isOfflineLikeError = true;
+                               } else if (error.message.includes('503')) {
+                                       errorMessage = 'Server temporarily unavailable - try again shortly';
+                                       isServerSideError = true;
                                } else if (error.message.includes('500')) {
                                        errorMessage = 'Server error - check server logs';
+                                       isServerSideError = true;
                                } else if (error.message.includes('404')) {
                                        errorMessage = 'Server endpoint not found';
                                } else if (error.message.includes('403') || error.message.includes('401')) {
@@ -154,7 +212,37 @@ class ServerStore {
                                }
                        }
 
-                       this._error = errorMessage;
+                       let cachedProps: ApiLlamaCppServerProps | null = null;
+
+                       if (!hadCachedProps) {
+                               cachedProps = this.readCachedServerProps();
+                               if (cachedProps) {
+                                       this._serverProps = cachedProps;
+                                       this._error = null;
+
+                                       if (isOfflineLikeError || isServerSideError) {
+                                               this._serverWarning = errorMessage;
+                                       }
+
+                                       console.warn(
+                                               'Failed to refresh server properties, using cached values from localStorage:',
+                                               errorMessage
+                                       );
+                               } else {
+                                       this._error = errorMessage;
+                               }
+                       } else {
+                               this._error = null;
+
+                               if (isOfflineLikeError || isServerSideError) {
+                                       this._serverWarning = errorMessage;
+                               }
+
+                               console.warn(
+                                       'Failed to refresh server properties, continuing with cached values:',
+                                       errorMessage
+                               );
+                       }
                        console.error('Error fetching server properties:', error);
                } finally {
                        this._loading = false;
@@ -167,8 +255,10 @@ class ServerStore {
        clear(): void {
                this._serverProps = null;
                this._error = null;
+               this._serverWarning = null;
                this._loading = false;
                this._slotsEndpointAvailable = null;
+               this.persistServerProps(null);
        }
 }
 
@@ -177,6 +267,7 @@ export const serverStore = new ServerStore();
 export const serverProps = () => serverStore.serverProps;
 export const serverLoading = () => serverStore.loading;
 export const serverError = () => serverStore.error;
+export const serverWarning = () => serverStore.serverWarning;
 export const modelName = () => serverStore.modelName;
 export const supportedModalities = () => serverStore.supportedModalities;
 export const supportsVision = () => serverStore.supportsVision;
index a08e21b3938ae6d7dbd7d4ea0f9fb2fed806dd0d..0652467b8c90e6cc1740e19123d32c0466a85103 100644 (file)
@@ -27,11 +27,10 @@ export async function validateApiKey(fetch: typeof globalThis.fetch): Promise<vo
                if (!response.ok) {
                        if (response.status === 401 || response.status === 403) {
                                throw error(401, 'Access denied');
-                       } else if (response.status >= 500) {
-                               throw error(response.status, 'Server error - check if llama.cpp server is running');
-                       } else {
-                               throw error(response.status, `Server responded with status ${response.status}`);
                        }
+
+                       console.warn(`Server responded with status ${response.status} during API key validation`);
+                       return;
                }
        } catch (err) {
                // If it's already a SvelteKit error, re-throw it
@@ -40,6 +39,6 @@ export async function validateApiKey(fetch: typeof globalThis.fetch): Promise<vo
                }
 
                // Network or other errors
-               throw error(503, 'Cannot connect to server - check if llama.cpp server is running');
+               console.warn('Cannot connect to server for API key validation:', err);
        }
 }