common/sagemaker_rl/orchestrator/workflow/manager/experiment_manager.py [1594:1630]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                logger.error(e)
                pass

        # wait until ExperimentDb state is updated
        if self.local_mode or wait:
            trained_state = (
                self.experiment_record._training_state == TrainingState.TRAINED
                and self.experiment_record._last_trained_model_id == next_model_to_train_id
                and self.experiment_record._next_model_to_train_id is None
            )
            num_retries = 0

            while not trained_state:
                # Sync experiment state if required
                self._sync_experiment_state_with_ddb()
                logger.debug("Waiting for experiment table training status to be updated...")
                time.sleep(2 * (2 ** num_retries))
                trained_state = (
                    self.experiment_record._training_state == TrainingState.TRAINED
                    and self.experiment_record._last_trained_model_id == next_model_to_train_id
                    and self.experiment_record._next_model_to_train_id is None
                )
                num_retries += 1
                if num_retries >= 5:
                    raise UnhandledWorkflowException(
                        f"Training job '{self.experiment_record._next_model_to_train_id}' "
                        f"was in state of '{self.experiment_record._training_state}'. Expected it to be TRAINED."
                    )
                if (
                    self.experiment_record._training_state == TrainingState.FAILED
                    or self.experiment_record._training_state == TrainingState.STOPPED
                ):
                    raise SageMakerTrainingJobException(
                        f"Training job '{self.experiment_record._next_model_to_train_id}' "
                        f"ended in state of '{self.experiment_record._training_state}'. Please check Sagemaker logs for "
                        "more information."
                    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



common/sagemaker_rl/orchestrator/workflow/manager/experiment_manager.py [1711:1747]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                logger.error(e)
                pass

        # wait until exp ddb table updated
        if self.local_mode or wait:
            trained_state = (
                self.experiment_record._training_state == TrainingState.TRAINED
                and self.experiment_record._last_trained_model_id == next_model_to_train_id
                and self.experiment_record._next_model_to_train_id is None
            )
            num_retries = 0

            while not trained_state:
                # Sync experiment state if required
                self._sync_experiment_state_with_ddb()
                logger.debug("Waiting for experiment table training status to be updated...")
                time.sleep(2 * (2 ** num_retries))
                trained_state = (
                    self.experiment_record._training_state == TrainingState.TRAINED
                    and self.experiment_record._last_trained_model_id == next_model_to_train_id
                    and self.experiment_record._next_model_to_train_id is None
                )
                num_retries += 1
                if num_retries >= 5:
                    raise UnhandledWorkflowException(
                        f"Training job '{self.experiment_record._next_model_to_train_id}' "
                        f"was in state of '{self.experiment_record._training_state}'. Expected it to be TRAINED."
                    )
                if (
                    self.experiment_record._training_state == TrainingState.FAILED
                    or self.experiment_record._training_state == TrainingState.STOPPED
                ):
                    raise SageMakerTrainingJobException(
                        f"Training job '{self.experiment_record._next_model_to_train_id}' "
                        f"ended in state of '{self.experiment_record._training_state}'. Please check Sagemaker logs for "
                        "more information."
                    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



