### GET `/slots`: Returns the current slots processing state
-This endpoint can be disabled with `--no-slots`
+> [!WARNING]
+> This endpoint is intended for debugging and may be modified in future versions. For security reasons, we strongly advise against enabling it in production environments.
+
+This endpoint is disabled by default and can be enabled with `--slots`
If query param `?fail_on_no_slot=1` is set, this endpoint will respond with status code 503 if there is no available slots.
"grammar": "",
"id": 0,
"ignore_eos": false,
+ "is_processing": false,
"logit_bias": [],
"min_p": 0.05000000074505806,
"mirostat": 0,
"temperature"
],
"seed": 42,
- "state": 1,
"stop": [
"\n"
],
]
```
-Possible values for `slot[i].state` are:
-- `0`: SLOT_STATE_IDLE
-- `1`: SLOT_STATE_PROCESSING
-
### GET `/metrics`: Prometheus compatible metrics exporter
This endpoint is only accessible if `--metrics` is set.
for (server_slot & slot : slots) {
json slot_data = get_formated_generation(slot);
- slot_data["id"] = slot.id;
- slot_data["id_task"] = slot.id_task;
- slot_data["state"] = slot.state;
- slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
- slot_data["next_token"] = {
+ slot_data["id"] = slot.id;
+ slot_data["id_task"] = slot.id_task;
+ slot_data["is_processing"] = slot.is_processing();
+ slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
+ slot_data["next_token"] = {
{"has_next_token", slot.has_next_token},
{"has_new_line", slot.has_new_line},
{"n_remain", slot.n_remaining},
{"stopping_word", slot.stopping_word},
};
- if (slot_data["state"] == SLOT_STATE_IDLE) {
- n_idle_slots++;
- } else {
+ if (slot.is_processing()) {
n_processing_slots++;
+ } else {
+ n_idle_slots++;
}
slots_data.push_back(slot_data);
async def step_all_slots_status(context, expected_slot_status_string: Literal['idle', 'busy'] | str):
match expected_slot_status_string:
case 'idle':
- expected_slot_status = 0
+ expected_slot_status = False
case 'busy':
- expected_slot_status = 1
+ expected_slot_status = True
case _:
assert False, "unknown status"
- expected_slots = [{'id': slot_id, 'state': expected_slot_status}
+ expected_slots = [{'id': slot_id, 'is_processing': expected_slot_status}
for slot_id in range(context.n_slots)]
await request_slots_status(context, expected_slots)
if status_code == 503 and status_code == expected_http_status_code:
return
if status_code == 200 and status_code == expected_http_status_code:
- n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots)
- n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots)
+ n_slots_idle = sum(1 if not slot["is_processing"] else 0 for slot in slots)
+ n_slots_processing = sum(1 if slot["is_processing"] else 0 for slot in slots)
if ((slots_idle is None or slots_idle == n_slots_idle)
and (slots_processing is None or slots_processing == n_slots_processing)):
return