};
struct server_response {
+ bool running = true;
+
// for keeping track of all tasks waiting for the result
std::unordered_set<int> waiting_task_ids;
while (true) {
std::unique_lock<std::mutex> lock(mutex_results);
condition_results.wait(lock, [&]{
+ if (!running) {
+ SRV_DBG("%s : queue result stop\n", __func__);
+ std::terminate(); // we cannot return here since the caller is HTTP code
+ }
return !queue_results.empty();
});
}
std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout));
+ if (!running) {
+ SRV_DBG("%s : queue result stop\n", __func__);
+ std::terminate(); // we cannot return here since the caller is HTTP code
+ }
if (cr_res == std::cv_status::timeout) {
return nullptr;
}
}
}
}
+
+ // terminate the waiting loop
+ void terminate() {
+ running = false;
+ condition_results.notify_all();
+ }
};
struct server_context {
svr->new_task_queue = [¶ms] { return new httplib::ThreadPool(params.n_threads_http); };
// clean up function, to be called before exit
- auto clean_up = [&svr]() {
+ auto clean_up = [&svr, &ctx_server]() {
SRV_INF("%s: cleaning up before exit...\n", __func__);
svr->stop();
+ ctx_server.queue_results.terminate();
llama_backend_free();
};
if (!ctx_server.load_model(params)) {
clean_up();
- // t.join(); // FIXME: see below
+ t.join();
LOG_ERR("%s: exiting due to model loading error\n", __func__);
return 1;
}
ctx_server.queue_tasks.start_loop();
clean_up();
- // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
+ t.join();
return 0;
}