server : fix thread.join() on exit (#12831)

author Xuan-Son Nguyen <redacted>

Tue, 8 Apr 2025 16:37:06 +0000 (18:37 +0200)

committer GitHub <redacted>

Tue, 8 Apr 2025 16:37:06 +0000 (18:37 +0200)
author Xuan-Son Nguyen <redacted>
Tue, 8 Apr 2025 16:37:06 +0000 (18:37 +0200)
committer GitHub <redacted>
Tue, 8 Apr 2025 16:37:06 +0000 (18:37 +0200)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index 760c3646433ad1070a34e8913c35ac795149a1a1..1bf1ee876b40ff0f2852af81de9e89dad70ea7f4 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1705,6 +1705,8 @@ private:
  };
  
  struct server_response {
+    bool running = true;
+
      // for keeping track of all tasks waiting for the result
      std::unordered_set<int> waiting_task_ids;
  
@@ -1759,6 +1761,10 @@ struct server_response {
          while (true) {
              std::unique_lock<std::mutex> lock(mutex_results);
              condition_results.wait(lock, [&]{
+                if (!running) {
+                    SRV_DBG("%s : queue result stop\n", __func__);
+                    std::terminate(); // we cannot return here since the caller is HTTP code
+                }
                  return !queue_results.empty();
              });
  
@@ -1789,6 +1795,10 @@ struct server_response {
              }
  
              std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout));
+            if (!running) {
+                SRV_DBG("%s : queue result stop\n", __func__);
+                std::terminate(); // we cannot return here since the caller is HTTP code
+            }
              if (cr_res == std::cv_status::timeout) {
                  return nullptr;
              }
@@ -1818,6 +1828,12 @@ struct server_response {
              }
          }
      }
+
+    // terminate the waiting loop
+    void terminate() {
+        running = false;
+        condition_results.notify_all();
+    }
  };
  
  struct server_context {
@@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) {
      svr->new_task_queue = [&params] { return new httplib::ThreadPool(params.n_threads_http); };
  
      // clean up function, to be called before exit
-    auto clean_up = [&svr]() {
+    auto clean_up = [&svr, &ctx_server]() {
          SRV_INF("%s: cleaning up before exit...\n", __func__);
          svr->stop();
+        ctx_server.queue_results.terminate();
          llama_backend_free();
      };
  
@@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) {
  
      if (!ctx_server.load_model(params)) {
          clean_up();
-        // t.join(); // FIXME: see below
+        t.join();
          LOG_ERR("%s: exiting due to model loading error\n", __func__);
          return 1;
      }
@@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) {
      ctx_server.queue_tasks.start_loop();
  
      clean_up();
-    // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
+    t.join();
  
      return 0;
  }
author	Xuan-Son Nguyen <redacted>
	Tue, 8 Apr 2025 16:37:06 +0000 (18:37 +0200)
committer	GitHub <redacted>
	Tue, 8 Apr 2025 16:37:06 +0000 (18:37 +0200)