server: Introduce LLAMA_BUILD_WEBUI build flag to allow disabling the embedded web...

author Kusha Gharahi <redacted>

Fri, 27 Mar 2026 16:25:55 +0000 (11:25 -0500)

committer GitHub <redacted>

Fri, 27 Mar 2026 16:25:55 +0000 (17:25 +0100)
author Kusha Gharahi <redacted>
Fri, 27 Mar 2026 16:25:55 +0000 (11:25 -0500)
committer GitHub <redacted>
Fri, 27 Mar 2026 16:25:55 +0000 (17:25 +0100)
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix

index 79a7270e5d83ca5535787f256cf5dd78d7687082..289273e72e503609a856becc6797288db1481ea7 100644 (file)
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -41,6 +41,7 @@
    effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
    enableStatic ? effectiveStdenv.hostPlatform.isStatic,
    precompileMetalShaders ? false,
+  useWebUi ? true,
  }:
  
  let
@@ -164,6 +165,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
    cmakeFlags =
      [
        (cmakeBool "LLAMA_BUILD_SERVER" true)
+      (cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
        (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
        (cmakeBool "GGML_NATIVE" false)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 69da97dc1e449f633ec8f03479b1e356ba157f85..caea48c5060d2eead5b45d3b477361c4a013902a 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,6 +108,7 @@ option(LLAMA_BUILD_TESTS    "llama: build tests"          ${LLAMA_STANDALONE})
  option(LLAMA_BUILD_TOOLS    "llama: build tools"          ${LLAMA_STANDALONE})
  option(LLAMA_BUILD_EXAMPLES "llama: build examples"       ${LLAMA_STANDALONE})
  option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
+option(LLAMA_BUILD_WEBUI    "llama: build the embedded Web UI for server"  ON)
  option(LLAMA_TOOLS_INSTALL  "llama: install tools"        ${LLAMA_TOOLS_INSTALL_DEFAULT})
  option(LLAMA_TESTS_INSTALL  "llama: install tests"        ON)
  
diff --git a/tools/server/CMakeLists.txt b/tools/server/CMakeLists.txt

index fc4cb5dcc8acd466ccd2a586be6c906b7111fe0a..a72b4864098c462a0097c5efd37a1e085eeff23f 100644 (file)
--- a/tools/server/CMakeLists.txt
+++ b/tools/server/CMakeLists.txt
@@ -37,22 +37,29 @@ set(TARGET_SRCS
      server-models.cpp
      server-models.h
  )
-set(PUBLIC_ASSETS
-    index.html.gz
-    loading.html
-)
  
-foreach(asset ${PUBLIC_ASSETS})
-    set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
-    set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
-    list(APPEND TARGET_SRCS ${output})
-    add_custom_command(
-        DEPENDS "${input}"
-        OUTPUT "${output}"
-        COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
+option(LLAMA_BUILD_WEBUI "Build the embedded Web UI" ON)
+
+if (LLAMA_BUILD_WEBUI)
+    set(PUBLIC_ASSETS
+        index.html.gz
+        loading.html
      )
-    set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
-endforeach()
+
+    foreach(asset ${PUBLIC_ASSETS})
+        set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
+        set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
+        list(APPEND TARGET_SRCS ${output})
+        add_custom_command(
+            DEPENDS "${input}"
+            OUTPUT "${output}"
+            COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
+        )
+        set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
+    endforeach()
+    add_definitions(-DLLAMA_BUILD_WEBUI)
+else()
+endif()
  
  add_executable(${TARGET} ${TARGET_SRCS})
  install(TARGETS ${TARGET} RUNTIME)
diff --git a/tools/server/server-http.cpp b/tools/server/server-http.cpp

index 3466512d0c3d2b7bf448f6c26234695316a1face..429cddcc2e82b9ab5cbe65411bf99715a8dd002d 100644 (file)
--- a/tools/server/server-http.cpp
+++ b/tools/server/server-http.cpp
@@ -8,9 +8,11 @@
  #include <string>
  #include <thread>
  
+#ifdef LLAMA_BUILD_WEBUI
  // auto generated files (see README.md for details)
  #include "index.html.gz.hpp"
  #include "loading.html.hpp"
+#endif
  
  //
  // HTTP implementation using cpp-httplib
@@ -181,11 +183,14 @@ bool server_http_context::init(const common_params & params) {
      auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
          bool ready = is_ready.load();
          if (!ready) {
+#ifdef LLAMA_BUILD_WEBUI
              auto tmp = string_split<std::string>(req.path, '.');
              if (req.path == "/" || tmp.back() == "html") {
                  res.status = 503;
                  res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
-            } else {
+            } else
+#endif
+            {
                  // no endpoints is allowed to be accessed when the server is not ready
                  // this is to prevent any data races or inconsistent states
                  res.status = 503;
@@ -255,6 +260,7 @@ bool server_http_context::init(const common_params & params) {
                  return 1;
              }
          } else {
+#ifdef LLAMA_BUILD_WEBUI
              // using embedded static index.html
              srv->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) {
                  if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
@@ -268,6 +274,7 @@ bool server_http_context::init(const common_params & params) {
                  }
                  return false;
              });
+#endif
          }
      }
      return true;
author	Kusha Gharahi <redacted>
	Fri, 27 Mar 2026 16:25:55 +0000 (11:25 -0500)
committer	GitHub <redacted>
	Fri, 27 Mar 2026 16:25:55 +0000 (17:25 +0100)
.devops/nix/package.nix		patch \| blob \| history
CMakeLists.txt		patch \| blob \| history
tools/server/CMakeLists.txt		patch \| blob \| history
tools/server/server-http.cpp		patch \| blob \| history