def _wait_for_workers()

in src/sagemaker_training/smdataparallel.py [0:0]


    def _wait_for_workers(self):  # type: () -> None
        logger.info("Waiting for MPI workers to establish their SSH connections")

        workers = [host for host in self._hosts if host != self._master_hostname]
        try:
            with timeout.timeout(seconds=self.timeout_in_seconds):
                for host in workers:
                    while not _can_connect(host):
                        time.sleep(self._interval)
                    logger.info("Worker %s available for communication", host)
        except timeout.TimeoutError:
            logger.exception(
                "Connection between the hosts couldn't established. Aborting the training."
            )
            raise