src/sagemaker_training/mpi.py [430:446]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        status_file = MPI_FINISHED_STATUS_FILE + "." + self._master_hostname
        for host in self._hosts:
            if host != self._master_hostname:
                status = _write_status_file(host, status_file)
                retry_count = 5 if not status else 0
                while not status:
                    if retry_count == 0:
                        break
                    logger.info(f"Retry creating status file onto {host}")
                    retry_count -= 1
                    time.sleep(1)
                    status = _write_status_file(host, status_file)

                if not status:
                    logger.info(f"Failed to create status file onto {host}")

        time.sleep(30)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/sagemaker_training/smdataparallel.py [341:357]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        status_file = MPI_FINISHED_STATUS_FILE + "." + self._master_hostname
        for host in self._hosts:
            if host != self._master_hostname:
                status = _write_status_file(host, status_file)
                retry_count = 5 if not status else 0
                while not status:
                    if retry_count == 0:
                        break
                    logger.info(f"Retry creating status file onto {host}")
                    retry_count -= 1
                    time.sleep(1)
                    status = _write_status_file(host, status_file)

                if not status:
                    logger.info(f"Failed to create status file onto {host}")

        time.sleep(30)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



