common/sagemaker_rl/orchestrator/workflow/manager/experiment_manager.py [1396:1424]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        succeeded_state = (
            self.experiment_record._joining_state == JoiningState.SUCCEEDED
            and self.experiment_record._last_joined_job_id == next_join_job_id
            and self.experiment_record._next_join_job_id is None
        )
        num_retries = 0

        while not succeeded_state:
            # Sync experiment state if required
            self._sync_experiment_state_with_ddb()
            logger.debug("Waiting for experiment table joining status to be updated...")
            time.sleep(2 * (2 ** num_retries))
            succeeded_state = (
                self.experiment_record._joining_state == JoiningState.SUCCEEDED
                and self.experiment_record._last_joined_job_id == next_join_job_id
                and self.experiment_record._next_join_job_id is None
            )
            num_retries += 1
            if num_retries >= 5:
                raise UnhandledWorkflowException(
                    f"Joining job '{self.experiment_record._next_join_job_id}' "
                    f"was in state of '{self.experiment_record._joining_state}'. Failed to sync table states."
                )
            if (
                self.experiment_record._joining_state == JoiningState.FAILED
                or self.experiment_record._joining_state == JoiningState.CANCELLED
            ):
                raise WorkflowJoiningJobException(
                    f"Joining job '{self.experiment_record._next_join_job_id}' "
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


common/sagemaker_rl/orchestrator/workflow/manager/experiment_manager.py [1492:1520]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            succeeded_state = (
                self.experiment_record._joining_state == JoiningState.SUCCEEDED
                and self.experiment_record._last_joined_job_id == next_join_job_id
                and self.experiment_record._next_join_job_id is None
            )
            num_retries = 0

            while not succeeded_state:
                # Sync experiment state if required
                self._sync_experiment_state_with_ddb()
                logger.debug("Waiting for experiment table joining status to be updated...")
                time.sleep(2 * (2 ** num_retries))
                succeeded_state = (
                    self.experiment_record._joining_state == JoiningState.SUCCEEDED
                    and self.experiment_record._last_joined_job_id == next_join_job_id
                    and self.experiment_record._next_join_job_id is None
                )
                num_retries += 1
                if num_retries >= 5:
                    raise UnhandledWorkflowException(
                        f"Joining job '{self.experiment_record._next_join_job_id}' "
                        f"was in state of '{self.experiment_record._joining_state}'. Failed to sync table states."
                    )
                if (
                    self.experiment_record._joining_state == JoiningState.FAILED
                    or self.experiment_record._joining_state == JoiningState.CANCELLED
                ):
                    raise WorkflowJoiningJobException(
                        f"Joining job '{self.experiment_record._next_join_job_id}' "
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -