server : test script : add timeout for all requests (#9282)

author Xuan Son Nguyen <redacted>

Mon, 2 Sep 2024 20:08:38 +0000 (22:08 +0200)

committer GitHub <redacted>

Mon, 2 Sep 2024 20:08:38 +0000 (22:08 +0200)
author Xuan Son Nguyen <redacted>
Mon, 2 Sep 2024 20:08:38 +0000 (22:08 +0200)
committer GitHub <redacted>
Mon, 2 Sep 2024 20:08:38 +0000 (22:08 +0200)
diff --git a/examples/server/tests/features/parallel.feature b/examples/server/tests/features/parallel.feature

index 96a96d6f8f7d30e830a4306f149c3afb21e08c2c..6cd306a2bcf7c9b5a7eb56550628c8a2e82e5f2c 100644 (file)
--- a/examples/server/tests/features/parallel.feature
+++ b/examples/server/tests/features/parallel.feature
@@ -52,8 +52,8 @@ Feature: Parallel
      Then all prompts are predicted with <n_predict> tokens
      Examples:
        | streaming | n_predict |
-      | disabled  | 200       |
-      | enabled   | 200       |
+      | disabled  | 128       |
+      | enabled   | 64        |
  
    Scenario Outline: Multi users OAI completions compatibility no v1
      Given a system prompt You are a writer.
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py

index 1864a694fc94a6299e761cdebbd6704ff996cfa3..18daad4760e701a20acb409f87756e1c4bc073e2 100644 (file)
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -23,6 +23,8 @@ from prometheus_client import parser
  
  # pyright: reportRedeclaration=false
  
+DEFAULT_TIMEOUT_SECONDS = aiohttp.ClientTimeout(total=600)
+
  @step("a server listening on {server_fqdn}:{server_port}")
  def step_server_config(context, server_fqdn: str, server_port: str):
      context.server_fqdn = server_fqdn
@@ -689,7 +691,7 @@ def step_tokenize_set_add_special(context):
  @async_run_until_complete
  async def step_tokenize(context):
      context.tokenized_text = context_text(context)
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          tokenize_args = {
              "content": context.tokenized_text,
          }
@@ -706,7 +708,7 @@ async def step_tokenize(context):
  @async_run_until_complete
  async def step_detokenize(context):
      assert len(context.tokens) > 0
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with session.post(f'{context.base_url}/detokenize',
                                  json={
                                      "tokens": context.tokens,
@@ -735,7 +737,7 @@ def step_strings_for_tokenization(context):
  @step('an OPTIONS request is sent from {origin}')
  @async_run_until_complete
  async def step_options_request(context, origin):
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          headers = {'Authorization': f'Bearer {context.user_api_key}', 'Origin': origin}
          async with session.options(f'{context.base_url}/v1/chat/completions',
                                      headers=headers) as response:
@@ -751,7 +753,7 @@ def step_check_options_header_value(context, cors_header, cors_header_value):
  @step('prometheus metrics are exposed')
  @async_run_until_complete
  async def step_prometheus_metrics_exported(context):
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with await session.get(f'{context.base_url}/metrics') as metrics_response:
              assert metrics_response.status == 200
              assert metrics_response.headers['Content-Type'] == "text/plain; version=0.0.4"
@@ -824,7 +826,7 @@ async def concurrent_requests(context, f_completion, *args, **kwargs):
  @step('the slot {slot_id:d} is saved with filename "{filename}"')
  @async_run_until_complete
  async def step_save_slot(context, slot_id, filename):
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with session.post(f'{context.base_url}/slots/{slot_id}?action=save',
                                  json={"filename": filename},
                                  headers={"Content-Type": "application/json"}) as response:
@@ -834,7 +836,7 @@ async def step_save_slot(context, slot_id, filename):
  @step('the slot {slot_id:d} is restored with filename "{filename}"')
  @async_run_until_complete
  async def step_restore_slot(context, slot_id, filename):
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with session.post(f'{context.base_url}/slots/{slot_id}?action=restore',
                                  json={"filename": filename},
                                  headers={"Content-Type": "application/json"}) as response:
@@ -844,7 +846,7 @@ async def step_restore_slot(context, slot_id, filename):
  @step('the slot {slot_id:d} is erased')
  @async_run_until_complete
  async def step_erase_slot(context, slot_id):
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with session.post(f'{context.base_url}/slots/{slot_id}?action=erase',
                                  headers={"Content-Type": "application/json"}) as response:
              context.response = response
@@ -853,7 +855,7 @@ async def step_erase_slot(context, slot_id):
  @step('switch {on_or_off} lora adapter {lora_id:d}')
  @async_run_until_complete
  async def toggle_lora_adapter(context, on_or_off: str, lora_id: int):
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with session.post(f'{context.base_url}/lora-adapters',
                                  json=[{'id': lora_id, 'scale': 1 if on_or_off == 'on' else 0}],
                                  headers={"Content-Type": "application/json"}) as response:
@@ -889,7 +891,7 @@ async def request_completion(prompt,
              print(f"Set user_api_key: {user_api_key}")
          headers['Authorization'] = f'Bearer {user_api_key}'
  
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with session.post(f'{base_url}/completion',
                                  json={
                                      "input_prefix": prompt_prefix,
@@ -902,8 +904,7 @@ async def request_completion(prompt,
                                      "temperature": temperature if temperature is not None else 0.8,
                                      "n_probs": 2,
                                  },
-                                headers=headers,
-                                timeout=3600) as response:
+                                headers=headers) as response:
              if expect_api_error is None or not expect_api_error:
                  assert response.status == 200
                  assert response.headers['Access-Control-Allow-Origin'] == origin
@@ -961,7 +962,7 @@ async def oai_chat_completions(user_prompt,
      if async_client:
          origin = 'llama.cpp'
          headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
-        async with aiohttp.ClientSession() as session:
+        async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
              async with session.post(f'{base_url}{base_path}',
                                      json=payload,
                                      headers=headers) as response:
@@ -1048,7 +1049,7 @@ async def oai_chat_completions(user_prompt,
  
  
  async def request_embedding(content, seed, base_url=None) -> list[list[float]]:
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with session.post(f'{base_url}/embedding',
                                  json={
                                      "content": content,
@@ -1068,14 +1069,13 @@ async def request_oai_embeddings(input, seed,
          headers=[]
          if user_api_key is not None:
              headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
-        async with aiohttp.ClientSession() as session:
+        async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
              async with session.post(f'{base_url}/v1/embeddings',
                                      json={
                                          "input": input,
                                          "model": model,
                                      },
-                                    headers=headers,
-                                    timeout=3600) as response:
+                                    headers=headers) as response:
                  assert response.status == 200, f"received status code not expected: {response.status}"
                  assert response.headers['Access-Control-Allow-Origin'] == origin
                  assert response.headers['Content-Type'] == "application/json; charset=utf-8"
@@ -1194,7 +1194,7 @@ async def wait_for_slots_status(context,
      if 'GITHUB_ACTIONS' in os.environ:
          timeout *= 2
  
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          while True:
              async with await session.get(f'{base_url}/slots', params=params) as slots_response:
                  status_code = slots_response.status
@@ -1237,7 +1237,7 @@ def assert_embeddings(embeddings):
  
  
  async def request_slots_status(context, expected_slots):
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
          async with await session.get(f'{context.base_url}/slots') as slots_response:
              assert slots_response.status == 200
              slots = await slots_response.json()
author	Xuan Son Nguyen <redacted>
	Mon, 2 Sep 2024 20:08:38 +0000 (22:08 +0200)
committer	GitHub <redacted>
	Mon, 2 Sep 2024 20:08:38 +0000 (22:08 +0200)
examples/server/tests/features/parallel.feature		patch \| blob \| history
examples/server/tests/features/steps/steps.py		patch \| blob \| history