in tfx/components/infra_validator/model_server_runners/kubernetes_runner.py [0:0]
def WaitUntilRunning(self, deadline: float) -> None:
assert self._pod_name, (
'Pod has not been created yet. You should call Start() first.')
while time.time() < deadline:
try:
pod = self._k8s_core_api.read_namespaced_pod(
name=self._pod_name,
namespace=self._namespace)
except rest.ApiException as e:
logging.info('Continue polling after getting ApiException(%s)', e)
time.sleep(_DEFAULT_POLLING_INTERVAL_SEC)
continue
# Pod phase is one of Pending, Running, Succeeded, Failed, or Unknown.
# Succeeded and Failed indicates the pod lifecycle has reached its end,
# while we expect the job to be running and hanging. Phase is Unknown if
# the state of the pod could not be obtained, thus we can wait until we
# confirm the phase.
pod_phase = _PodPhase(pod.status.phase)
if pod_phase == _PodPhase.RUNNING and pod.status.pod_ip:
self._endpoint = '{}:{}'.format(pod.status.pod_ip,
self._serving_binary.container_port)
return
if pod_phase.is_done:
raise error_types.JobAborted(
'Job has been aborted. (phase={})'.format(pod_phase))
logging.info('Waiting for the pod to be running. (phase=%s)', pod_phase)
time.sleep(_DEFAULT_POLLING_INTERVAL_SEC)
raise error_types.DeadlineExceeded(
'Deadline exceeded while waiting for pod to be running.')