* server: tests - longer inference timeout for CI
// }
//}
- LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
+ //LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
}
}
if context.debug:
print(f"Starting checking for health for expected_health_status={expected_health_status}")
timeout = 3 # seconds
+ if expected_health_status == 'ok':
+ timeout = 10 # CI slow inference
interval = 0.5
counter = 0
async with aiohttp.ClientSession() as session:
if n_completions > 0:
return
- assert False, 'timeout exceeded'
+ assert False, f'{expected_health_status} timeout exceeded {counter}s>={timeout}'
def assert_embeddings(embeddings):