# pyright: reportRedeclaration=false
+DEFAULT_TIMEOUT_SECONDS = aiohttp.ClientTimeout(total=600)
+
@step("a server listening on {server_fqdn}:{server_port}")
def step_server_config(context, server_fqdn: str, server_port: str):
context.server_fqdn = server_fqdn
@async_run_until_complete
async def step_tokenize(context):
context.tokenized_text = context_text(context)
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
tokenize_args = {
"content": context.tokenized_text,
}
@async_run_until_complete
async def step_detokenize(context):
assert len(context.tokens) > 0
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/detokenize',
json={
"tokens": context.tokens,
@step('an OPTIONS request is sent from {origin}')
@async_run_until_complete
async def step_options_request(context, origin):
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
headers = {'Authorization': f'Bearer {context.user_api_key}', 'Origin': origin}
async with session.options(f'{context.base_url}/v1/chat/completions',
headers=headers) as response:
@step('prometheus metrics are exposed')
@async_run_until_complete
async def step_prometheus_metrics_exported(context):
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with await session.get(f'{context.base_url}/metrics') as metrics_response:
assert metrics_response.status == 200
assert metrics_response.headers['Content-Type'] == "text/plain; version=0.0.4"
@step('the slot {slot_id:d} is saved with filename "{filename}"')
@async_run_until_complete
async def step_save_slot(context, slot_id, filename):
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/slots/{slot_id}?action=save',
json={"filename": filename},
headers={"Content-Type": "application/json"}) as response:
@step('the slot {slot_id:d} is restored with filename "{filename}"')
@async_run_until_complete
async def step_restore_slot(context, slot_id, filename):
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/slots/{slot_id}?action=restore',
json={"filename": filename},
headers={"Content-Type": "application/json"}) as response:
@step('the slot {slot_id:d} is erased')
@async_run_until_complete
async def step_erase_slot(context, slot_id):
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/slots/{slot_id}?action=erase',
headers={"Content-Type": "application/json"}) as response:
context.response = response
@step('switch {on_or_off} lora adapter {lora_id:d}')
@async_run_until_complete
async def toggle_lora_adapter(context, on_or_off: str, lora_id: int):
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/lora-adapters',
json=[{'id': lora_id, 'scale': 1 if on_or_off == 'on' else 0}],
headers={"Content-Type": "application/json"}) as response:
print(f"Set user_api_key: {user_api_key}")
headers['Authorization'] = f'Bearer {user_api_key}'
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}/completion',
json={
"input_prefix": prompt_prefix,
"temperature": temperature if temperature is not None else 0.8,
"n_probs": 2,
},
- headers=headers,
- timeout=3600) as response:
+ headers=headers) as response:
if expect_api_error is None or not expect_api_error:
assert response.status == 200
assert response.headers['Access-Control-Allow-Origin'] == origin
if async_client:
origin = 'llama.cpp'
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}{base_path}',
json=payload,
headers=headers) as response:
async def request_embedding(content, seed, base_url=None) -> list[list[float]]:
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}/embedding',
json={
"content": content,
headers=[]
if user_api_key is not None:
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}/v1/embeddings',
json={
"input": input,
"model": model,
},
- headers=headers,
- timeout=3600) as response:
+ headers=headers) as response:
assert response.status == 200, f"received status code not expected: {response.status}"
assert response.headers['Access-Control-Allow-Origin'] == origin
assert response.headers['Content-Type'] == "application/json; charset=utf-8"
if 'GITHUB_ACTIONS' in os.environ:
timeout *= 2
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
while True:
async with await session.get(f'{base_url}/slots', params=params) as slots_response:
status_code = slots_response.status
async def request_slots_status(context, expected_slots):
- async with aiohttp.ClientSession() as session:
+ async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with await session.get(f'{context.base_url}/slots') as slots_response:
assert slots_response.status == 200
slots = await slots_response.json()