in llm_swarm/__init__.py [0:0]
def get_endpoints(endpoint_path: str, instances: int = 1, job_ids: Optional[List[str]] = None) -> List[str]:
"""Return list of endpoints from either a file or a comma separated string.
It also checks if the endpoints are reachable.
Args:
endpoint_path (str): path to file containing endpoints or comma separated string
instances (int, optional): number of instances. Defaults to 1.
Returns:
List[str]: list of endpoints (e.g. ["http://26.0.154.245:13120"])
"""
trying = True
with Loader(f"Waiting for {endpoint_path} to be created"):
while trying:
try:
endpoints = open(endpoint_path).read().splitlines()
assert (
len(endpoints) == instances
), f"#endpoints {len(endpoints)} doesn't match #instances {instances}" # could read an empty file
# due to race condition (slurm writing & us reading)
trying = False
except (OSError, AssertionError):
make_sure_jobs_are_still_running(job_ids)
sleep(1)
print("obtained endpoints", endpoints)
for endpoint in endpoints:
with Loader(f"Waiting for {endpoint} to be reachable"):
connected = False
while not connected:
try:
get_session().get(f"{endpoint}/health")
print(f"\nConnected to {endpoint}")
connected = True
except requests.exceptions.ConnectionError:
make_sure_jobs_are_still_running(job_ids)
sleep(1)
return endpoints