slot_params defaults;
defaults.sampling = params_base.sampling;
defaults.speculative = params_base.speculative;
+ defaults.n_keep = params_base.n_keep;
// enabling this will output extra debug information in the HTTP responses from the server
params.verbose = params_base.verbosity > 9;
SLT_INF(slot, "new slot n_ctx_slot = %d\n", slot.n_ctx);
slot.params.sampling = params_base.sampling;
+ slot.params.n_keep = params_base.n_keep;
slot.callback_on_release = [this](int) {
queue_tasks.pop_deferred_task();