// apply context-shift if needed
// TODO: simplify and improve
for (server_slot & slot : slots) {
- if (slot.is_processing() && slot.prompt.n_tokens() + 1 >= slot.n_ctx) {
+ if (slot.state == SLOT_STATE_GENERATING && slot.prompt.n_tokens() + 1 >= slot.n_ctx) {
if (!params_base.ctx_shift) {
// this check is redundant (for good)
// we should never get here, because generation should already stopped in process_token()