in tensorflow/inference/docker/build_artifacts/sagemaker/tfs_utils.py [0:0]
def wait_for_model(rest_port, model_name, timeout_seconds, pid=None):
"""
Notice:
The calculation for retry count based on timeout_seconds might introduce a small delta (0.1s) for each retry
which might cause total timeout longer than timeout_seconds
"""
tfs_url = "http://localhost:{}/v1/models/{}".format(rest_port, model_name)
start = time.time()
try:
session = requests.Session()
backoff_factor = 0.1
# sleep = {backoff factor} * (2 ^ ({number of retries so far} - 1))
retry_count = retry_from_timeout(timeout_seconds, backoff_factor)
retries = Retry(total=retry_count, backoff_factor=backoff_factor)
session.mount("http://", requests.adapters.HTTPAdapter(max_retries=retries))
log.info(
"Trying to connect with model server: {} with timeout : {} and retry : {}".format(
tfs_url, timeout_seconds, retry_count
)
)
response = session.get(tfs_url, timeout=0.1)
log.info(
f"tfs response status_code: {response.status_code} with content : {json.loads(response.content)}"
)
end = time.time()
if response.status_code == 200:
if is_model_ready(response):
return
elif wait_for_model_ready(tfs_url, timeout_seconds - int(end - start)):
return
raise MultiModelException(408, "Timed out after {} seconds".format(timeout_seconds), pid)
except (
ConnectionRefusedError,
NewConnectionError,
MaxRetryError,
requests.exceptions.ConnectionError,
):
raise MultiModelException(408, "Timed out after {} seconds".format(timeout_seconds), pid)